In [1]:
import pandas as pd
import numpy as np

In [2]:
# Import the data from a csv file
movie_data = pd.read_csv('kc-movie-data.csv')
movie_data.head()

Unnamed: 0,MName,Release_Date,RT_Score,Kill_Count
0,Friday the 13th,05/09/1980,0.64,
1,Friday the 13th Pt. 2,04/30/1981,0.27,
2,Friday the 13th Pt. 3,08/13/1982,0.11,
3,Friday the 13th: The Final Chapter,04/13/1984,0.23,
4,Friday the 13th: A New Beginning,03/22/1985,0.18,


In [6]:
# Using .info to look at how many nulls there are as well as whether or not the object types are right
movie_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49 entries, 0 to 48
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   MName         49 non-null     object 
 1   Release_Date  49 non-null     object 
 2   RT_Score      49 non-null     float64
 3   Kill_Count    0 non-null      float64
dtypes: float64(2), object(2)
memory usage: 1.7+ KB


In [7]:
# Let's take a look at some of the statistics of the numeric objects.
movie_data.describe()

Unnamed: 0,RT_Score,Kill_Count
count,49.0,0.0
mean,0.536327,
std,0.268985,
min,0.08,
25%,0.31,
50%,0.53,
75%,0.76,
max,0.98,


In [8]:
# The lowest rated movie is 8% which means there aren't any 0s sneaking by.
movie_data.RT_Score.min()

0.08

In [12]:
# There are no nulls in the column
movie_data.RT_Score.isnull().sum()

0

In [13]:
# Looks like the average movie comes out to 53% which is a decent average.
movie_data.RT_Score.mean()

0.536326530612245

In [16]:
# The release date column should be a datetime object.
movie_data.dtypes

MName            object
Release_Date     object
RT_Score        float64
Kill_Count      float64
dtype: object

In [17]:
# use pandas datetime features to convert release date into a datetime object
movie_data.Release_Date = pd.to_datetime(movie_data.Release_Date)
movie_data.dtypes

MName                   object
Release_Date    datetime64[ns]
RT_Score               float64
Kill_Count             float64
dtype: object

In [18]:
movie_data.head()

Unnamed: 0,MName,Release_Date,RT_Score,Kill_Count
0,Friday the 13th,1980-05-09,0.64,
1,Friday the 13th Pt. 2,1981-04-30,0.27,
2,Friday the 13th Pt. 3,1982-08-13,0.11,
3,Friday the 13th: The Final Chapter,1984-04-13,0.23,
4,Friday the 13th: A New Beginning,1985-03-22,0.18,


In [19]:
# Now that it is fixed we can create a new csv similar to the original csv but with the new datetime format.
movie_data.to_csv('kc-movie-data2.csv')

In [23]:
# What was the earliest release date and its RT score?
movie_data[movie_data.Release_Date == movie_data.Release_Date.min()]

Unnamed: 0,MName,Release_Date,RT_Score,Kill_Count
18,Night of the Living Dead,1968-10-01,0.97,


In [24]:
# What is the most recent release?
movie_data[movie_data.Release_Date == movie_data.Release_Date.max()]

Unnamed: 0,MName,Release_Date,RT_Score,Kill_Count
27,Cult of Chucky,2017-10-20,0.79,


In [28]:
# The columns can be lowercased for convenience
movie_data.columns = movie_data.columns.str.strip().str.lower()

In [29]:
movie_data.columns

Index(['mname', 'release_date', 'rt_score', 'kill_count'], dtype='object')

In [30]:
movie_data.to_csv('kc-movie-data2.csv')

In [44]:
movie_data = movie_data.set_index('release_date')
movie_data.head()

Unnamed: 0_level_0,mname,rt_score,kill_count
release_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1980-05-09,Friday the 13th,0.64,
1981-04-30,Friday the 13th Pt. 2,0.27,
1982-08-13,Friday the 13th Pt. 3,0.11,
1984-04-13,Friday the 13th: The Final Chapter,0.23,
1985-03-22,Friday the 13th: A New Beginning,0.18,


In [48]:
movie_data.index.year

Int64Index([1980, 1981, 1982, 1984, 1985, 1986, 1988, 1989, 1993, 2002, 2003,
            2009, 1996, 1997, 2000, 2011, 1985, 2011, 1968, 2017, 1988, 1990,
            1991, 1990, 1998, 2004, 2013, 2017, 2013, 2007, 2009, 1979, 1986,
            1992, 2008, 1997, 1974, 2006, 2015, 1980, 1988, 2017, 1984, 1985,
            1987, 1988, 1989, 1991, 1981],
           dtype='int64', name='release_date')

In [66]:
def year_to_decade(year):
    decade = str(year)[:3] + '0'
    return decade

In [75]:
movie_data['decade'] = movie_data.release_date.apply(lambda x: year_to_decade(x))

In [78]:
movie_data.sample(10)

Unnamed: 0,release_date,mname,rt_score,kill_count,decade
1,1981-04-30,Friday the 13th Pt. 2,0.27,,1980
23,1990-11-18,Stephen King's It,0.67,,1990
19,2017-02-24,Get Out,0.98,,2010
31,1979-05-25,Alien,0.98,,1970
41,2017-09-05,IT (2017),0.86,,2010
12,1996-12-20,Scream,0.79,,1990
24,1998-10-16,Bride of Chucky,0.46,,1990
16,1985-06-25,The Thing,0.85,,1980
38,2015-12-04,Krampus,0.67,,2010
22,1991-08-30,Child's Play 3,0.29,,1990


In [86]:
# What are the best movies by decade?
movie_data.loc[movie_data.groupby('decade')['rt_score'].idxmax()]

Unnamed: 0,release_date,mname,rt_score,kill_count,decade
18,1968-10-01,Night of the Living Dead,0.97,,1960
31,1979-05-25,Alien,0.98,,1970
32,1986-07-18,Aliens,0.97,,1980
13,1997-12-12,Scream 2,0.81,,1990
30,2009-10-04,Trick r' Treat,0.83,,2000
19,2017-02-24,Get Out,0.98,,2010


In [88]:
# What are the worst movies by decade?
movie_data.loc[movie_data.groupby('decade')['rt_score'].idxmin()]

Unnamed: 0,release_date,mname,rt_score,kill_count,decade
18,1968-10-01,Night of the Living Dead,0.97,,1960
36,1974-12-20,Black Christmas,0.71,,1970
7,1989-07-28,Friday the 13th: Jason Takes Manhatton,0.08,,1980
47,1991-09-13,Freddy's Dead: The Final Nightmare,0.19,,1990
37,2006-12-25,Black Christmas (2006 Remake),0.16,,2000
17,2011-10-10,The Thing (2011 Remake),0.34,,2010


In [87]:
# What is the average rt_score per decade?
movie_data.groupby('decade')['rt_score'].mean()

decade
1960    0.970000
1970    0.845000
1980    0.487895
1990    0.483000
2000    0.414444
2010    0.723750
Name: rt_score, dtype: float64

In [89]:
# How many movies do I have per decade in the current data set?
movie_data.groupby('decade').count()

Unnamed: 0_level_0,release_date,mname,rt_score,kill_count
decade,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1960,1,1,1,0
1970,2,2,2,0
1980,19,19,19,0
1990,10,10,10,0
2000,9,9,9,0
2010,8,8,8,0


In [90]:
movie_data.head()

Unnamed: 0,release_date,mname,rt_score,kill_count,decade
0,1980-05-09,Friday the 13th,0.64,,1980
1,1981-04-30,Friday the 13th Pt. 2,0.27,,1980
2,1982-08-13,Friday the 13th Pt. 3,0.11,,1980
3,1984-04-13,Friday the 13th: The Final Chapter,0.23,,1980
4,1985-03-22,Friday the 13th: A New Beginning,0.18,,1980


In [91]:
movie_data.to_csv('kc-movie-data2.csv')