## Import Libraries

In [1]:
import pandas as pd
import os

## Import the data

In [2]:
# Get current working directory
cwd = os.getcwd()
csv_file = os.path.join(cwd, 'original_disney_plus.csv')
print(csv_file)

# Read csv file
df = pd.read_csv(csv_file)

# Print first 5 rows
df.head(2)

c:\Users\mmoore\Desktop\Progression\Analysis\Excel\disney_plus\disney-plus-analytics\original_disney_plus.csv


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson","Chris Diamantopoulos, Tony Anselmo, Tress MacN...",,"November 26, 2021",2016,TV-G,23 min,"Animation, Family",Join Mickey and the gang as they duck the halls!
1,s2,Movie,Ernest Saves Christmas,John Cherry,"Jim Varney, Noelle Parker, Douglas Seale",,"November 26, 2021",1988,PG,91 min,Comedy,Santa Claus passes his magic bag to a new St. ...


## Get Descriptive Info

In [3]:
df.shape

(1450, 12)

In [4]:
new_df = df

new_df.info

<bound method DataFrame.info of      show_id     type                                             title  \
0         s1    Movie  Duck the Halls: A Mickey Mouse Christmas Special   
1         s2    Movie                            Ernest Saves Christmas   
2         s3    Movie                      Ice Age: A Mammoth Christmas   
3         s4    Movie                        The Queen Family Singalong   
4         s5  TV Show                             The Beatles: Get Back   
...      ...      ...                                               ...   
1445   s1446    Movie                          X-Men Origins: Wolverine   
1446   s1447    Movie    Night at the Museum: Battle of the Smithsonian   
1447   s1448    Movie                                   Eddie the Eagle   
1448   s1449    Movie                              Bend It Like Beckham   
1449   s1450    Movie             Captain Sparky vs. The Flying Saucers   

                               director  \
0     Alonso Ramirez Ram

## Data Cleaning

In [5]:
# Country column is na then replace it with Country not specified

new_df['country'] = new_df['country'].fillna('Country Not Specified')
new_df['director'] = new_df['director'].fillna('Director Not Specified')
new_df['cast'] = new_df['cast'].fillna('Cast Not Specified')
new_df['rating'] = new_df['rating'].fillna('Rating Not Specified')

new_df.head(5)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson","Chris Diamantopoulos, Tony Anselmo, Tress MacN...",Country Not Specified,"November 26, 2021",2016,TV-G,23 min,"Animation, Family",Join Mickey and the gang as they duck the halls!
1,s2,Movie,Ernest Saves Christmas,John Cherry,"Jim Varney, Noelle Parker, Douglas Seale",Country Not Specified,"November 26, 2021",1988,PG,91 min,Comedy,Santa Claus passes his magic bag to a new St. ...
2,s3,Movie,Ice Age: A Mammoth Christmas,Karen Disher,"Raymond Albert Romano, John Leguizamo, Denis L...",United States,"November 26, 2021",2011,TV-G,23 min,"Animation, Comedy, Family",Sid the Sloth is on Santa's naughty list.
3,s4,Movie,The Queen Family Singalong,Hamish Hamilton,"Darren Criss, Adam Lambert, Derek Hough, Alexa...",Country Not Specified,"November 26, 2021",2021,TV-PG,41 min,Musical,"This is real life, not just fantasy!"
4,s5,TV Show,The Beatles: Get Back,Director Not Specified,"John Lennon, Paul McCartney, George Harrison, ...",Country Not Specified,"November 25, 2021",2021,Rating Not Specified,1 Season,"Docuseries, Historical, Music",A three-part documentary from Peter Jackson ca...


In [6]:
new_df.dtypes

show_id         object
type            object
title           object
director        object
cast            object
country         object
date_added      object
release_year     int64
rating          object
duration        object
listed_in       object
description     object
dtype: object

In [7]:
# drop na values in date_added column

new_df.dropna(subset=['date_added'], inplace=True)

In [8]:
new_df.isna().sum()

show_id         0
type            0
title           0
director        0
cast            0
country         0
date_added      0
release_year    0
rating          0
duration        0
listed_in       0
description     0
dtype: int64

In [9]:
# Create new columns from duration column

new_df[['Duration_Amount','Duration_Type']] = new_df['duration'].str.split(" ",expand = True)
new_df.head(2)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,Duration_Amount,Duration_Type
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson","Chris Diamantopoulos, Tony Anselmo, Tress MacN...",Country Not Specified,"November 26, 2021",2016,TV-G,23 min,"Animation, Family",Join Mickey and the gang as they duck the halls!,23,min
1,s2,Movie,Ernest Saves Christmas,John Cherry,"Jim Varney, Noelle Parker, Douglas Seale",Country Not Specified,"November 26, 2021",1988,PG,91 min,Comedy,Santa Claus passes his magic bag to a new St. ...,91,min


In [11]:
# change datatype of duration_num column

new_df['Duration_Amount'] = new_df['Duration_Amount'].astype(int)
new_df.dtypes

show_id            object
type               object
title              object
director           object
cast               object
country            object
date_added         object
release_year        int64
rating             object
duration           object
listed_in          object
description        object
Duration_Amount     int32
Duration_Type      object
dtype: object

In [12]:
new_df.head(1)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,Duration_Amount,Duration_Type
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson","Chris Diamantopoulos, Tony Anselmo, Tress MacN...",Country Not Specified,"November 26, 2021",2016,TV-G,23 min,"Animation, Family",Join Mickey and the gang as they duck the halls!,23,min


In [13]:
# rename all new_df columns

new_df = new_df.rename(columns={'show_id':'Show_ID',
                                'type':'Type',
                                'title':'Title',
                                'director':'Director',
                                'cast':'Cast',
                                'country':'Country',
                                'date_added':'Date_Added',
                                'release_year':'Release_Year',
                                'rating':'Rating',
                                'duration':'Duration',
                                'listed_in':'Listed_In',
                                })
new_df.head(2)

Unnamed: 0,Show_ID,Type,Title,Director,Cast,Country,Date_Added,Release_Year,Rating,Duration,Listed_In,description,Duration_Amount,Duration_Type
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson","Chris Diamantopoulos, Tony Anselmo, Tress MacN...",Country Not Specified,"November 26, 2021",2016,TV-G,23 min,"Animation, Family",Join Mickey and the gang as they duck the halls!,23,min
1,s2,Movie,Ernest Saves Christmas,John Cherry,"Jim Varney, Noelle Parker, Douglas Seale",Country Not Specified,"November 26, 2021",1988,PG,91 min,Comedy,Santa Claus passes his magic bag to a new St. ...,91,min


In [14]:
new_df = new_df.rename(columns={'description':'Description'})
new_df.head(1)

Unnamed: 0,Show_ID,Type,Title,Director,Cast,Country,Date_Added,Release_Year,Rating,Duration,Listed_In,Description,Duration_Amount,Duration_Type
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson","Chris Diamantopoulos, Tony Anselmo, Tress MacN...",Country Not Specified,"November 26, 2021",2016,TV-G,23 min,"Animation, Family",Join Mickey and the gang as they duck the halls!,23,min


## Data Export

In [15]:
new_df.to_csv('disney_plus.csv', index=False)