In [1]:
import pandas as pd
import numpy as np

In [2]:
# Import the data from a csv file
movie_data = pd.read_csv('kc-movie-data.csv')
movie_data.head()

Unnamed: 0,MName,Release_Date,RT_Score,Kill_Count
0,Friday the 13th,05/09/1980,0.64,
1,Friday the 13th Pt. 2,04/30/1981,0.27,
2,Friday the 13th Pt. 3,08/13/1982,0.11,
3,Friday the 13th: The Final Chapter,04/13/1984,0.23,
4,Friday the 13th: A New Beginning,03/22/1985,0.18,


In [6]:
# Using .info to look at how many nulls there are as well as whether or not the object types are right
movie_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49 entries, 0 to 48
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   MName         49 non-null     object 
 1   Release_Date  49 non-null     object 
 2   RT_Score      49 non-null     float64
 3   Kill_Count    0 non-null      float64
dtypes: float64(2), object(2)
memory usage: 1.7+ KB


In [7]:
# Let's take a look at some of the statistics of the numeric objects.
movie_data.describe()

Unnamed: 0,RT_Score,Kill_Count
count,49.0,0.0
mean,0.536327,
std,0.268985,
min,0.08,
25%,0.31,
50%,0.53,
75%,0.76,
max,0.98,


In [8]:
# The lowest rated movie is 8% which means there aren't any 0s sneaking by.
movie_data.RT_Score.min()

0.08

In [12]:
# There are no nulls in the column
movie_data.RT_Score.isnull().sum()

0

In [13]:
# Looks like the average movie comes out to 53% which is a decent average.
movie_data.RT_Score.mean()

0.536326530612245

In [16]:
# The release date column should be a datetime object.
movie_data.dtypes

MName            object
Release_Date     object
RT_Score        float64
Kill_Count      float64
dtype: object

In [17]:
# use pandas datetime features to convert release date into a datetime object
movie_data.Release_Date = pd.to_datetime(movie_data.Release_Date)
movie_data.dtypes

MName                   object
Release_Date    datetime64[ns]
RT_Score               float64
Kill_Count             float64
dtype: object

In [18]:
movie_data.head()

Unnamed: 0,MName,Release_Date,RT_Score,Kill_Count
0,Friday the 13th,1980-05-09,0.64,
1,Friday the 13th Pt. 2,1981-04-30,0.27,
2,Friday the 13th Pt. 3,1982-08-13,0.11,
3,Friday the 13th: The Final Chapter,1984-04-13,0.23,
4,Friday the 13th: A New Beginning,1985-03-22,0.18,


In [19]:
# Now that it is fixed we can create a new csv similar to the original csv but with the new datetime format.
movie_data.to_csv('kc-movie-data2.csv')

In [23]:
# What was the earliest release date and its RT score?
movie_data[movie_data.Release_Date == movie_data.Release_Date.min()]

Unnamed: 0,MName,Release_Date,RT_Score,Kill_Count
18,Night of the Living Dead,1968-10-01,0.97,


In [24]:
# What is the most recent release?
movie_data[movie_data.Release_Date == movie_data.Release_Date.max()]

Unnamed: 0,MName,Release_Date,RT_Score,Kill_Count
27,Cult of Chucky,2017-10-20,0.79,


In [28]:
# The columns can be lowercased for convenience
movie_data.columns = movie_data.columns.str.strip().str.lower()

In [29]:
movie_data.columns

Index(['mname', 'release_date', 'rt_score', 'kill_count'], dtype='object')

In [30]:
movie_data.to_csv('kc-movie-data2.csv')