In [1]:
import pandas as pd
import numpy as np

In [2]:
# Grabbing Movie Data to add some new stats
mdata = pd.read_csv('kc-movie-data.csv')
mdata.head()

Unnamed: 0,title,date,rt_score,kc,length
0,Friday the 13th,05/09/1980,0.64,10,95
1,Friday the 13th Pt. 2,04/30/1981,0.27,9,87
2,Friday the 13th Pt. 3,08/13/1982,0.11,12,95
3,Friday the 13th: The Final Chapter,04/13/1984,0.23,14,91
4,Friday the 13th: A New Beginning,03/22/1985,0.18,22,90


In [5]:
# Adding kills per minute stat
mdata['kpm'] = mdata.kc / mdata.length
mdata.head()

Unnamed: 0,title,date,rt_score,kc,length,kpm
0,Friday the 13th,05/09/1980,0.64,10,95,0.105263
1,Friday the 13th Pt. 2,04/30/1981,0.27,9,87,0.103448
2,Friday the 13th Pt. 3,08/13/1982,0.11,12,95,0.126316
3,Friday the 13th: The Final Chapter,04/13/1984,0.23,14,91,0.153846
4,Friday the 13th: A New Beginning,03/22/1985,0.18,22,90,0.244444


In [6]:
# Checking for movie with highest kpm
mdata[mdata.kpm == mdata.kpm.max()]

Unnamed: 0,title,date,rt_score,kc,length,kpm
39,Killer Clowns From Outer Space,05/27/1988,0.75,40,86,0.465116


In [11]:
# Ordering by highest kc
mdata.sort_values(by='kc', ascending = False)

Unnamed: 0,title,date,rt_score,kc,length,kpm
39,Killer Clowns From Outer Space,05/27/1988,0.75,40,86,0.465116
29,The Mist,11/21/2007,0.71,34,126,0.269841
48,My Bloody Valentine 3D (2009),01/16/2009,0.61,30,101,0.29703
35,Alien Resurrection,11/26/1997,0.54,30,109,0.275229
33,Alien 3,05/22/1992,0.44,29,115,0.252174
37,Black Christmas (2006 Remake),12/25/2006,0.16,27,92,0.293478
30,Trick r' Treat,10/04/2009,0.83,27,82,0.329268
9,Jason X,04/26/2002,0.2,25,91,0.274725
8,Jason Goes To Hell: The Final Friday,08/13/1993,0.2,24,90,0.266667
10,Freddy vs. Jason,08/13/2003,0.41,22,97,0.226804


In [14]:
# I want to add a categorical popularity bin for the Rotten Tomatoes scores for 
# low (<30), medium (<70) and high (>=70) scores
mdata['pop_cat'] = pd.cut(mdata.rt_score, bins=[0, 0.30, 0.69, 1], labels=['low', 'medium', 'high'])
mdata.head()

Unnamed: 0,title,date,rt_score,kc,length,kpm,pop_cat
0,Friday the 13th,05/09/1980,0.64,10,95,0.105263,medium
1,Friday the 13th Pt. 2,04/30/1981,0.27,9,87,0.103448,low
2,Friday the 13th Pt. 3,08/13/1982,0.11,12,95,0.126316,low
3,Friday the 13th: The Final Chapter,04/13/1984,0.23,14,91,0.153846,low
4,Friday the 13th: A New Beginning,03/22/1985,0.18,22,90,0.244444,low


In [15]:
# Adding function for repeating kpm process as more data gets added
def kpm(df):
    df['kpm'] = df.kc / df.length

In [16]:
# Adding function for repeating popularity category process as more data gets added
def pop_cat(df):
    df['pop_cat'] = pd.cut(df.rt_score, bins=[0, 0.30, 0.69, 1], labels=['low', 'medium', 'high'])

In [30]:
# Adding function for combining any future functions I may add during this project
def add_features(df):
    kpm(df)
    pop_cat(df)

In [18]:
# Reloading mdata df to test new functions
mdata = pd.read_csv('kc-movie-data.csv')
mdata.head()

Unnamed: 0,title,date,rt_score,kc,length
0,Friday the 13th,05/09/1980,0.64,10,95
1,Friday the 13th Pt. 2,04/30/1981,0.27,9,87
2,Friday the 13th Pt. 3,08/13/1982,0.11,12,95
3,Friday the 13th: The Final Chapter,04/13/1984,0.23,14,91
4,Friday the 13th: A New Beginning,03/22/1985,0.18,22,90


In [26]:
# Testing kpm function
kpm(mdata)
mdata.head()

Unnamed: 0,title,date,rt_score,kc,length,kpm
0,Friday the 13th,05/09/1980,0.64,10,95,0.105263
1,Friday the 13th Pt. 2,04/30/1981,0.27,9,87,0.103448
2,Friday the 13th Pt. 3,08/13/1982,0.11,12,95,0.126316
3,Friday the 13th: The Final Chapter,04/13/1984,0.23,14,91,0.153846
4,Friday the 13th: A New Beginning,03/22/1985,0.18,22,90,0.244444


In [27]:
# Testing pop_cat function
pop_cat(mdata)
mdata.head()

Unnamed: 0,title,date,rt_score,kc,length,kpm,pop_cat
0,Friday the 13th,05/09/1980,0.64,10,95,0.105263,medium
1,Friday the 13th Pt. 2,04/30/1981,0.27,9,87,0.103448,low
2,Friday the 13th Pt. 3,08/13/1982,0.11,12,95,0.126316,low
3,Friday the 13th: The Final Chapter,04/13/1984,0.23,14,91,0.153846,low
4,Friday the 13th: A New Beginning,03/22/1985,0.18,22,90,0.244444,low


In [28]:
# Resetting data set and testing combined function
del mdata['kpm']
del mdata['pop_cat']

In [31]:
add_features(mdata)
mdata.head()

Unnamed: 0,title,date,rt_score,kc,length,kpm,pop_cat
0,Friday the 13th,05/09/1980,0.64,10,95,0.105263,medium
1,Friday the 13th Pt. 2,04/30/1981,0.27,9,87,0.103448,low
2,Friday the 13th Pt. 3,08/13/1982,0.11,12,95,0.126316,low
3,Friday the 13th: The Final Chapter,04/13/1984,0.23,14,91,0.153846,low
4,Friday the 13th: A New Beginning,03/22/1985,0.18,22,90,0.244444,low


In [33]:
# Grabbing kc data to go through finishing touches
kcdata = pd.read_csv('kc-kill-data.csv')
kcdata.head(20)

Unnamed: 0,name,kill_order,mtype,mwep,gc,dm,title,gender
0,Barry,1,stab (gut),knife,No,No,Friday the 13th,0
1,Claudette,2,unknown,unknown,No,Yes,Friday the 13th,1
2,Annie Phillips,3,slash (throat),knife,No,No,Friday the 13th,1
3,Ned Rubinstein,4,slash (throat),unknown,No,No,Friday the 13th,0
4,Jack Burrell,5,stab (throat),arrow,Yes,No,Friday the 13th,0
5,Marcie Stanler,6,chop (head),axe,No,No,Friday the 13th,1
6,Steve Christy,7,stab (gut),unknown,No,No,Friday the 13th,0
7,Bill Brown,8,pincushion,arrow,No,No,Friday the 13th,0
8,Brenda Jones,9,unknown,unknown,No,No,Friday the 13th,1
9,Pamela Voorhees,10,decapitated,machete,No,No,Friday the 13th,1


In [37]:
# Placing title column next to name column
kcdata = kcdata[['name', 'title', 'kill_order', 'mtype', 'mwep', 'gc', 'dm', 'gender']]
kcdata.head()

Unnamed: 0,name,title,kill_order,mtype,mwep,gc,dm,gender
0,Barry,Friday the 13th,1,stab (gut),knife,No,No,0
1,Claudette,Friday the 13th,2,unknown,unknown,No,Yes,1
2,Annie Phillips,Friday the 13th,3,slash (throat),knife,No,No,1
3,Ned Rubinstein,Friday the 13th,4,slash (throat),unknown,No,No,0
4,Jack Burrell,Friday the 13th,5,stab (throat),arrow,Yes,No,0


In [38]:
# Replacing files in folder with updated files
mdata.to_csv('kc-movie-data.csv')
kcdata.to_csv('kc-kill-data.csv')