In [1]:
import pandas as pd
import numpy as np
import re

from datetime import date

In [2]:
#Load Disney Plus TV titles
plus_df = pd.read_csv('input/disney_plus_titles.csv')
plus_df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson","Chris Diamantopoulos, Tony Anselmo, Tress MacN...",,"November 26, 2021",2016,TV-G,23 min,"Animation, Family",Join Mickey and the gang as they duck the halls!
1,s2,Movie,Ernest Saves Christmas,John Cherry,"Jim Varney, Noelle Parker, Douglas Seale",,"November 26, 2021",1988,PG,91 min,Comedy,Santa Claus passes his magic bag to a new St. ...
2,s3,Movie,Ice Age: A Mammoth Christmas,Karen Disher,"Raymond Albert Romano, John Leguizamo, Denis L...",United States,"November 26, 2021",2011,TV-G,23 min,"Animation, Comedy, Family",Sid the Sloth is on Santa's naughty list.
3,s4,Movie,The Queen Family Singalong,Hamish Hamilton,"Darren Criss, Adam Lambert, Derek Hough, Alexa...",,"November 26, 2021",2021,TV-PG,41 min,Musical,"This is real life, not just fantasy!"
4,s5,TV Show,The Beatles: Get Back,,"John Lennon, Paul McCartney, George Harrison, ...",,"November 25, 2021",2021,,1 Season,"Docuseries, Historical, Music",A three-part documentary from Peter Jackson ca...


In [3]:
plus_df.count()

show_id         1450
type            1450
title           1450
director         977
cast            1260
country         1231
date_added      1447
release_year    1450
rating          1447
duration        1450
listed_in       1450
description     1450
dtype: int64

In [4]:
plus_df.dtypes

show_id         object
type            object
title           object
director        object
cast            object
country         object
date_added      object
release_year     int64
rating          object
duration        object
listed_in       object
description     object
dtype: object

## Save Directors to CSV -- Start

In [5]:
directors_df = plus_df[['title', 'director','release_year']].copy()
directors_df.head()

Unnamed: 0,title,director,release_year
0,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson",2016
1,Ernest Saves Christmas,John Cherry,1988
2,Ice Age: A Mammoth Christmas,Karen Disher,2011
3,The Queen Family Singalong,Hamish Hamilton,2021
4,The Beatles: Get Back,,2021


In [6]:
# Export director list
directors_df.to_csv (r'output/disney_director.csv', index = None, header=True) 

## Save Directors to CSV -- End

## Save Casts to CSV -- Start

In [7]:
casts_df = plus_df[['title', 'cast','release_year']].copy()
casts_df.head()

Unnamed: 0,title,cast,release_year
0,Duck the Halls: A Mickey Mouse Christmas Special,"Chris Diamantopoulos, Tony Anselmo, Tress MacN...",2016
1,Ernest Saves Christmas,"Jim Varney, Noelle Parker, Douglas Seale",1988
2,Ice Age: A Mammoth Christmas,"Raymond Albert Romano, John Leguizamo, Denis L...",2011
3,The Queen Family Singalong,"Darren Criss, Adam Lambert, Derek Hough, Alexa...",2021
4,The Beatles: Get Back,"John Lennon, Paul McCartney, George Harrison, ...",2021


In [8]:
# Export casts list
casts_df.to_csv (r'output/disney_casts.csv', index = None, header=True) 

## Save Casts to CSV -- End

In [9]:
#Load the 2021 Disney movie list to compare for duplicates

disney_df = pd.read_csv('output/FINAL_2021_disney_movies_total_gross.csv')
disney_df.head()

Unnamed: 0,movie_title,year,release_date,total_gross,inflation_adjusted_gross
0,101 Dalmatians,1961,"Jan 25, 1961",153000000,1386568896
1,101 Dalmatians,1996,"Nov 27, 1996",136189294,235202122
2,102 Dalmatians,2000,"Nov 22, 2000",66941559,105337713
3,1492: Conquest of Paradise,1992,"Oct 9, 1992",7099531,13711759
4,"20,000 Leagues Under the Sea",1954,"Dec 23, 1954",28200000,284065204


In [10]:
plus_df.rename(columns={'title': 'movie_title', 'release_year':'year'}, inplace=True)

In [11]:
#merge the 2 columns on these columns in order to find the differences
plus_show_df = plus_df.merge(disney_df.drop_duplicates(), on=['movie_title','year'], how='left', indicator=True)
plus_show_df.head()

Unnamed: 0,show_id,type,movie_title,director,cast,country,date_added,year,rating,duration,listed_in,description,release_date,total_gross,inflation_adjusted_gross,_merge
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson","Chris Diamantopoulos, Tony Anselmo, Tress MacN...",,"November 26, 2021",2016,TV-G,23 min,"Animation, Family",Join Mickey and the gang as they duck the halls!,,,,left_only
1,s2,Movie,Ernest Saves Christmas,John Cherry,"Jim Varney, Noelle Parker, Douglas Seale",,"November 26, 2021",1988,PG,91 min,Comedy,Santa Claus passes his magic bag to a new St. ...,"Nov 11, 1988",28202109.0,64597848.0,both
2,s3,Movie,Ice Age: A Mammoth Christmas,Karen Disher,"Raymond Albert Romano, John Leguizamo, Denis L...",United States,"November 26, 2021",2011,TV-G,23 min,"Animation, Comedy, Family",Sid the Sloth is on Santa's naughty list.,,,,left_only
3,s4,Movie,The Queen Family Singalong,Hamish Hamilton,"Darren Criss, Adam Lambert, Derek Hough, Alexa...",,"November 26, 2021",2021,TV-PG,41 min,Musical,"This is real life, not just fantasy!",,,,left_only
4,s5,TV Show,The Beatles: Get Back,,"John Lennon, Paul McCartney, George Harrison, ...",,"November 25, 2021",2021,,1 Season,"Docuseries, Historical, Music",A three-part documentary from Peter Jackson ca...,,,,left_only


In [12]:
plus_show_only_df = plus_show_df[plus_show_df['_merge'] == 'left_only']
plus_show_only_df.drop(['_merge','release_date','total_gross','inflation_adjusted_gross'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [13]:
plus_show_only_df.count()

show_id        1226
type           1226
movie_title    1226
director        753
cast           1037
country        1008
date_added     1223
year           1226
rating         1223
duration       1226
listed_in      1226
description    1226
dtype: int64

In [14]:
plus_show_only_df.head()

Unnamed: 0,show_id,type,movie_title,director,cast,country,date_added,year,rating,duration,listed_in,description
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson","Chris Diamantopoulos, Tony Anselmo, Tress MacN...",,"November 26, 2021",2016,TV-G,23 min,"Animation, Family",Join Mickey and the gang as they duck the halls!
2,s3,Movie,Ice Age: A Mammoth Christmas,Karen Disher,"Raymond Albert Romano, John Leguizamo, Denis L...",United States,"November 26, 2021",2011,TV-G,23 min,"Animation, Comedy, Family",Sid the Sloth is on Santa's naughty list.
3,s4,Movie,The Queen Family Singalong,Hamish Hamilton,"Darren Criss, Adam Lambert, Derek Hough, Alexa...",,"November 26, 2021",2021,TV-PG,41 min,Musical,"This is real life, not just fantasy!"
4,s5,TV Show,The Beatles: Get Back,,"John Lennon, Paul McCartney, George Harrison, ...",,"November 25, 2021",2021,,1 Season,"Docuseries, Historical, Music",A three-part documentary from Peter Jackson ca...
5,s6,Movie,Becoming Cousteau,Liz Garbus,"Jacques Yves Cousteau, Vincent Cassel",United States,"November 24, 2021",2021,PG-13,94 min,"Biographical, Documentary",An inside look at the legendary life of advent...


In [15]:
plus_show_only_df[plus_show_only_df.movie_title.str.startswith('The Beatle')]

Unnamed: 0,show_id,type,movie_title,director,cast,country,date_added,year,rating,duration,listed_in,description
4,s5,TV Show,The Beatles: Get Back,,"John Lennon, Paul McCartney, George Harrison, ...",,"November 25, 2021",2021,,1 Season,"Docuseries, Historical, Music",A three-part documentary from Peter Jackson ca...


In [16]:
# Export Disney Plus Shows
plus_show_only_df.to_csv (r'output/2021_disney_plus_shows.csv', index = None, header=True) 