## TMDB Movie Data Anlysis using Pandas and APIs

In [82]:
import requests
import pandas as pd
import numpy as np
import matplotlib as plt
import os
import sys
sys.setrecursionlimit(10000) 

In [83]:
BASE_URL = "https://api.themoviedb.org/3/movie/"
API_ACCESS_TOKEN = os.environ.get('API_ACCESS_TOKEN')
HEADERS = {
    "accept": "application/json",
    "Authorization": f"Bearer {API_ACCESS_TOKEN}"
}
movie_ids = [0, 299534, 19995, 140607, 299536, 597, 135397, 420818, 24428, 168259, 99861, 284054, 12445,181808, 330457, 351286, 109445, 321612, 260513]

In [84]:
internet_connection = False
movies_df =pd.DataFrame()
credits_df = pd.DataFrame()

### Fetching movie data from TMDB Movie Databse API

In [85]:
if (internet_connection != False):
    movies = []
    credits =[]
    for id in movie_ids:
        movie_detail_url = f"{BASE_URL}{id}"
        credits_url = f"{movie_detail_url}/credits"
        movie_response = requests.get(movie_detail_url, headers=HEADERS)
        credit_reponse = requests.get(credits_url, headers=HEADERS)
        if movie_response.status_code == 200 and credit_reponse.status_code == 200:
            movie_data = movie_response.json()
            credits_data = credit_reponse.json()
            movies.append(movie_data)
            credits.append(credits_data)
            print(f"{movie_detail_url} and {credits_url}: Success")
        else:
            print(f"Error occured trying to request movie and credits with id :{id}\nError Status Code-Movie:{movie_response.status_code}\nError Status Code-Credits:{credit_reponse.status_code}")

    #storing the data ass a Pandas DataFrame
    movies_df=pd.DataFrame(movies)
    credits_df=pd.DataFrame(credits)
    movies_df.to_json('../datasets/raw_data.json')
    credits_df.to_json('../datasets/movie_credits.json')
else:
    movies_df = pd.read_json('../datasets/raw_data.json')  
    credits_df = pd.read_json('../datasets/movie_credits.json')

## Data Cleaning and Preprocessing

In [86]:
#Dropping irrelevant columns
movies_df.drop(['adult', 'original_title', 'imdb_id', 'video', 'homepage'], axis=1, inplace=True)
movies_df

Unnamed: 0,backdrop_path,belongs_to_collection,budget,genres,id,origin_country,original_language,overview,popularity,poster_path,...,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",356000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...",299534,[US],en,After the devastating events of Avengers: Infi...,22.2597,/ulzhLuWrPK07P1YkdWQLZnQh1JL.jpg,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",2019-04-24,2799439100,181,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Avenge the fallen.,Avengers: Endgame,8.238,26207
1,/vL5LR6WdxWPjLPFRLe133jXWsh5.jpg,"{'id': 87096, 'name': 'Avatar Collection', 'po...",237000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",19995,[US],en,"In the 22nd century, a paraplegic Marine is di...",31.3525,/kyeqWdyUXW608qlYkRqosgbbJyK.jpg,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",2009-12-15,2923706026,162,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Enter the world of Pandora.,Avatar,7.588,32112
2,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,"{'id': 10, 'name': 'Star Wars Collection', 'po...",245000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",140607,[US],en,Thirty years after defeating the Galactic Empi...,12.7841,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",2015-12-15,2068223624,136,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Every generation has a story.,Star Wars: The Force Awakens,7.262,19665
3,/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",300000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",299536,[US],en,As the Avengers and their allies have continue...,32.6907,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",2018-04-25,2052415039,149,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Destiny arrives all the same.,Avengers: Infinity War,8.236,30386
4,/sCzcYW9h55WcesOqA12cgEr9Exw.jpg,,200000000,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",597,[US],en,101-year-old Rose DeWitt Bukater tells the sto...,31.6333,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",1997-11-18,2264162353,194,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Nothing on Earth could come between them.,Titanic,7.905,25867
5,/aIGIYJTyOkEVUmEd3z5x6diYsFx.jpg,"{'id': 328, 'name': 'Jurassic Park Collection'...",150000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",135397,[US],en,Twenty-two years after the events of Jurassic ...,19.5099,/rhr4y79GpxQF9IsfJItRXVaoGs4.jpg,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",2015-06-06,1671537444,124,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,The park is open.,Jurassic World,6.7,20623
6,/1TUg5pO1VZ4B0Q1amk3OlXvlpXV.jpg,"{'id': 762512, 'name': 'The Lion King (Reboot)...",260000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 18, '...",420818,[US],en,"Simba idolizes his father, King Mufasa, and ta...",22.2842,/dzBtMocZuJbjLOXvrl4zGYigDzh.jpg,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",2019-07-12,1662020819,118,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,The king has returned.,The Lion King,7.109,10301
7,/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",220000000,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",24428,[US],en,When an unexpected enemy emerges and threatens...,33.8215,/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",2012-04-25,1518815515,143,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Some assembly required.,The Avengers,7.734,31505
8,/cHkhb5A4gQRK6zs6Pv7zorHs8Nk.jpg,"{'id': 9485, 'name': 'The Fast and the Furious...",190000000,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",168259,[US],en,Deckard Shaw seeks revenge against Dominic Tor...,14.2561,/wurKlC3VKUgcfsn0K51MJYEleS2.jpg,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",2015-04-01,1515400000,137,"[{'english_name': 'Arabic', 'iso_639_1': 'ar',...",Released,Vengeance hits home.,Furious 7,7.225,10761
9,/6YwkGolwdOMNpbTOmLjoehlVWs5.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",365000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",99861,[US],en,When Tony Stark tries to jumpstart a dormant p...,17.2051,/4ssDuvEDkSArWEdyBl2X5EHvYKU.jpg,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",2015-04-22,1405403694,141,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,A new age has come.,Avengers: Age of Ultron,7.271,23345


### Evalutaing json-like columns and extracting and cleaning key data points

In [87]:
#evaluating json-like columns in movie_df
movies_df['genres'] = movies_df['genres'].apply(lambda genre_list: "|".join([genre['name'] for genre in genre_list]))
movies_df['belongs_to_collection'] = movies_df['belongs_to_collection'].apply(lambda belongs_to_collection: belongs_to_collection['name'] if isinstance(belongs_to_collection, dict) else None )
movies_df['production_companies'] = movies_df['production_companies'].apply(lambda company_list: "|".join([company['name'] for company in company_list]))
movies_df['production_countries'] = movies_df['production_countries'].apply(lambda country_list: "|".join([country['name'] for country in country_list]))
movies_df['spoken_languages'] = movies_df['spoken_languages'].apply(lambda lang_list: "|".join([language['english_name'] for language in lang_list]))
movies_df['origin_country'] = movies_df['origin_country'].apply(lambda country: country[0] if isinstance(country, list) else None)
movies_df

Unnamed: 0,backdrop_path,belongs_to_collection,budget,genres,id,origin_country,original_language,overview,popularity,poster_path,...,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,The Avengers Collection,356000000,Adventure|Science Fiction|Action,299534,US,en,After the devastating events of Avengers: Infi...,22.2597,/ulzhLuWrPK07P1YkdWQLZnQh1JL.jpg,...,United States of America,2019-04-24,2799439100,181,English|Japanese|Xhosa,Released,Avenge the fallen.,Avengers: Endgame,8.238,26207
1,/vL5LR6WdxWPjLPFRLe133jXWsh5.jpg,Avatar Collection,237000000,Action|Adventure|Fantasy|Science Fiction,19995,US,en,"In the 22nd century, a paraplegic Marine is di...",31.3525,/kyeqWdyUXW608qlYkRqosgbbJyK.jpg,...,United States of America|United Kingdom,2009-12-15,2923706026,162,English|Spanish,Released,Enter the world of Pandora.,Avatar,7.588,32112
2,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,Star Wars Collection,245000000,Adventure|Action|Science Fiction,140607,US,en,Thirty years after defeating the Galactic Empi...,12.7841,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,...,United States of America,2015-12-15,2068223624,136,English,Released,Every generation has a story.,Star Wars: The Force Awakens,7.262,19665
3,/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg,The Avengers Collection,300000000,Adventure|Action|Science Fiction,299536,US,en,As the Avengers and their allies have continue...,32.6907,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,...,United States of America,2018-04-25,2052415039,149,English|Xhosa,Released,Destiny arrives all the same.,Avengers: Infinity War,8.236,30386
4,/sCzcYW9h55WcesOqA12cgEr9Exw.jpg,,200000000,Drama|Romance,597,US,en,101-year-old Rose DeWitt Bukater tells the sto...,31.6333,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,...,United States of America,1997-11-18,2264162353,194,English|French|German|Swedish|Italian|Russian,Released,Nothing on Earth could come between them.,Titanic,7.905,25867
5,/aIGIYJTyOkEVUmEd3z5x6diYsFx.jpg,Jurassic Park Collection,150000000,Action|Adventure|Science Fiction|Thriller,135397,US,en,Twenty-two years after the events of Jurassic ...,19.5099,/rhr4y79GpxQF9IsfJItRXVaoGs4.jpg,...,United States of America,2015-06-06,1671537444,124,English,Released,The park is open.,Jurassic World,6.7,20623
6,/1TUg5pO1VZ4B0Q1amk3OlXvlpXV.jpg,The Lion King (Reboot) Collection,260000000,Adventure|Drama|Family|Animation,420818,US,en,"Simba idolizes his father, King Mufasa, and ta...",22.2842,/dzBtMocZuJbjLOXvrl4zGYigDzh.jpg,...,United States of America,2019-07-12,1662020819,118,English,Released,The king has returned.,The Lion King,7.109,10301
7,/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg,The Avengers Collection,220000000,Science Fiction|Action|Adventure,24428,US,en,When an unexpected enemy emerges and threatens...,33.8215,/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg,...,United States of America,2012-04-25,1518815515,143,English|Hindi|Russian,Released,Some assembly required.,The Avengers,7.734,31505
8,/cHkhb5A4gQRK6zs6Pv7zorHs8Nk.jpg,The Fast and the Furious Collection,190000000,Action|Thriller|Crime,168259,US,en,Deckard Shaw seeks revenge against Dominic Tor...,14.2561,/wurKlC3VKUgcfsn0K51MJYEleS2.jpg,...,United States of America,2015-04-01,1515400000,137,Arabic|English|Spanish|Thai,Released,Vengeance hits home.,Furious 7,7.225,10761
9,/6YwkGolwdOMNpbTOmLjoehlVWs5.jpg,The Avengers Collection,365000000,Action|Adventure|Science Fiction,99861,US,en,When Tony Stark tries to jumpstart a dormant p...,17.2051,/4ssDuvEDkSArWEdyBl2X5EHvYKU.jpg,...,United States of America,2015-04-22,1405403694,141,English,Released,A new age has come.,Avengers: Age of Ultron,7.271,23345


In [88]:
# #evaluating json-like columns in credits_df
credits_df['cast'] = credits_df['cast'].apply(lambda cast_list: '|'.join([cast['name'] for cast in cast_list]))
credits_df['cast_size'] = credits_df['cast'].apply(lambda cast_list: len(cast_list))
credits_df['Director'] = credits_df['crew'].apply(lambda crew_list:  next((crew['name'] for crew in crew_list if crew['job'] =='Director'), None))
credits_df['crew_size'] = credits_df['crew'].apply(lambda crew_list: len(crew_list))
credits_df.drop(columns='crew', inplace=True)
credits_df

Unnamed: 0,id,cast,cast_size,Director,crew_size
0,299534,Robert Downey Jr.|Chris Evans|Mark Ruffalo|Chr...,1496,Anthony Russo,593
1,19995,Sam Worthington|Zoe Saldaña|Sigourney Weaver|S...,943,James Cameron,986
2,140607,Harrison Ford|Mark Hamill|Carrie Fisher|Adam D...,2585,J.J. Abrams,257
3,299536,Robert Downey Jr.|Chris Evans|Chris Hemsworth|...,1010,Joe Russo,724
4,597,Leonardo DiCaprio|Kate Winslet|Billy Zane|Kath...,1621,James Cameron,258
5,135397,Chris Pratt|Bryce Dallas Howard|Irrfan Khan|Vi...,734,Colin Trevorrow,423
6,420818,Chiwetel Ejiofor|John Oliver|Donald Glover|Jam...,279,Jon Favreau,44
7,24428,Robert Downey Jr.|Chris Evans|Mark Ruffalo|Chr...,1658,Joss Whedon,631
8,168259,Vin Diesel|Paul Walker|Jason Statham|Michelle ...,648,James Wan,222
9,99861,Robert Downey Jr.|Chris Hemsworth|Mark Ruffalo...,1044,Joss Whedon,636


In [90]:
movies_df[['genres', 'belongs_to_collection', 'production_companies', 'production_countries', 'spoken_languages','origin_country']].value_counts(dropna=False)

credits_df[['cast', 'crew_size', "Director", 'cast_size']].value_counts(dropna=False)

cast                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    

In [92]:
movies_df.merge(credits_df, on='id')

Unnamed: 0,backdrop_path,belongs_to_collection,budget,genres,id,origin_country,original_language,overview,popularity,poster_path,...,spoken_languages,status,tagline,title,vote_average,vote_count,cast,cast_size,Director,crew_size
0,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,The Avengers Collection,356000000,Adventure|Science Fiction|Action,299534,US,en,After the devastating events of Avengers: Infi...,22.2597,/ulzhLuWrPK07P1YkdWQLZnQh1JL.jpg,...,English|Japanese|Xhosa,Released,Avenge the fallen.,Avengers: Endgame,8.238,26207,Robert Downey Jr.|Chris Evans|Mark Ruffalo|Chr...,1496,Anthony Russo,593
1,/vL5LR6WdxWPjLPFRLe133jXWsh5.jpg,Avatar Collection,237000000,Action|Adventure|Fantasy|Science Fiction,19995,US,en,"In the 22nd century, a paraplegic Marine is di...",31.3525,/kyeqWdyUXW608qlYkRqosgbbJyK.jpg,...,English|Spanish,Released,Enter the world of Pandora.,Avatar,7.588,32112,Sam Worthington|Zoe Saldaña|Sigourney Weaver|S...,943,James Cameron,986
2,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,Star Wars Collection,245000000,Adventure|Action|Science Fiction,140607,US,en,Thirty years after defeating the Galactic Empi...,12.7841,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,...,English,Released,Every generation has a story.,Star Wars: The Force Awakens,7.262,19665,Harrison Ford|Mark Hamill|Carrie Fisher|Adam D...,2585,J.J. Abrams,257
3,/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg,The Avengers Collection,300000000,Adventure|Action|Science Fiction,299536,US,en,As the Avengers and their allies have continue...,32.6907,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,...,English|Xhosa,Released,Destiny arrives all the same.,Avengers: Infinity War,8.236,30386,Robert Downey Jr.|Chris Evans|Chris Hemsworth|...,1010,Joe Russo,724
4,/sCzcYW9h55WcesOqA12cgEr9Exw.jpg,,200000000,Drama|Romance,597,US,en,101-year-old Rose DeWitt Bukater tells the sto...,31.6333,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,...,English|French|German|Swedish|Italian|Russian,Released,Nothing on Earth could come between them.,Titanic,7.905,25867,Leonardo DiCaprio|Kate Winslet|Billy Zane|Kath...,1621,James Cameron,258
5,/aIGIYJTyOkEVUmEd3z5x6diYsFx.jpg,Jurassic Park Collection,150000000,Action|Adventure|Science Fiction|Thriller,135397,US,en,Twenty-two years after the events of Jurassic ...,19.5099,/rhr4y79GpxQF9IsfJItRXVaoGs4.jpg,...,English,Released,The park is open.,Jurassic World,6.7,20623,Chris Pratt|Bryce Dallas Howard|Irrfan Khan|Vi...,734,Colin Trevorrow,423
6,/1TUg5pO1VZ4B0Q1amk3OlXvlpXV.jpg,The Lion King (Reboot) Collection,260000000,Adventure|Drama|Family|Animation,420818,US,en,"Simba idolizes his father, King Mufasa, and ta...",22.2842,/dzBtMocZuJbjLOXvrl4zGYigDzh.jpg,...,English,Released,The king has returned.,The Lion King,7.109,10301,Chiwetel Ejiofor|John Oliver|Donald Glover|Jam...,279,Jon Favreau,44
7,/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg,The Avengers Collection,220000000,Science Fiction|Action|Adventure,24428,US,en,When an unexpected enemy emerges and threatens...,33.8215,/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg,...,English|Hindi|Russian,Released,Some assembly required.,The Avengers,7.734,31505,Robert Downey Jr.|Chris Evans|Mark Ruffalo|Chr...,1658,Joss Whedon,631
8,/cHkhb5A4gQRK6zs6Pv7zorHs8Nk.jpg,The Fast and the Furious Collection,190000000,Action|Thriller|Crime,168259,US,en,Deckard Shaw seeks revenge against Dominic Tor...,14.2561,/wurKlC3VKUgcfsn0K51MJYEleS2.jpg,...,Arabic|English|Spanish|Thai,Released,Vengeance hits home.,Furious 7,7.225,10761,Vin Diesel|Paul Walker|Jason Statham|Michelle ...,648,James Wan,222
9,/6YwkGolwdOMNpbTOmLjoehlVWs5.jpg,The Avengers Collection,365000000,Action|Adventure|Science Fiction,99861,US,en,When Tony Stark tries to jumpstart a dormant p...,17.2051,/4ssDuvEDkSArWEdyBl2X5EHvYKU.jpg,...,English,Released,A new age has come.,Avengers: Age of Ultron,7.271,23345,Robert Downey Jr.|Chris Hemsworth|Mark Ruffalo...,1044,Joss Whedon,636


### Getting Unique Column Values

In [None]:


unique_genres = set()
movies_df['genres'].dropna().apply(lambda genre_list: unique_genres.update(genre_list.split('|')))
print(unique_genres)

unique_production_companies = set()
movies_df['production_companies'].dropna().apply(lambda production_companies: unique_production_companies.update(production_companies.split('|')))
print(unique_production_companies)

unique_production_countries = set()
movies_df['production_countries'].dropna().apply(lambda production_countries: unique_production_countries.update(production_countries.split('|')))

unique_spoken_languages = set()
movies_df['spoken_languages'].dropna().apply(lambda spoken_languages: unique_spoken_languages.update(spoken_languages.split('|')))
print(unique_spoken_languages)
unique_spoken_languages


{'Thriller', 'Romance', 'Crime', 'Fantasy', 'Science Fiction', 'Drama', 'Comedy', 'Animation', 'Adventure', 'Family', 'Action'}
{'Paramount Pictures', '20th Century Fox', 'Universal Pictures', 'Warner Bros. Pictures', 'Walt Disney Pictures', 'Amblin Entertainment', 'Mandeville Films', 'Ingenious Media', 'One Race', 'Lightstorm Entertainment', 'Walt Disney Animation Studios', 'Dune Entertainment', 'Bad Robot', 'Original Film', 'Pixar', 'Lucasfilm Ltd.', 'Heyday Films', 'Marvel Studios', 'Fairview Entertainment'}
{'English', 'Spanish', 'French', 'German', 'Russian', 'Thai', 'Korean', 'Swahili', 'Italian', 'Japanese', 'Swedish', 'Xhosa', 'Hindi', 'Arabic'}


{'Arabic',
 'English',
 'French',
 'German',
 'Hindi',
 'Italian',
 'Japanese',
 'Korean',
 'Russian',
 'Spanish',
 'Swahili',
 'Swedish',
 'Thai',
 'Xhosa'}

In [None]:
movies_df

Unnamed: 0,backdrop_path,belongs_to_collection,budget,genres,id,origin_country,original_language,overview,popularity,poster_path,...,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,The Avengers Collection,356000000,Adventure|Science Fiction|Action,299534,US,en,After the devastating events of Avengers: Infi...,22.2597,/ulzhLuWrPK07P1YkdWQLZnQh1JL.jpg,...,United States of America,2019-04-24,2799439100,181,English|Japanese|Xhosa,Released,Avenge the fallen.,Avengers: Endgame,8.238,26207
1,/vL5LR6WdxWPjLPFRLe133jXWsh5.jpg,Avatar Collection,237000000,Action|Adventure|Fantasy|Science Fiction,19995,US,en,"In the 22nd century, a paraplegic Marine is di...",31.3525,/kyeqWdyUXW608qlYkRqosgbbJyK.jpg,...,United States of America|United Kingdom,2009-12-15,2923706026,162,English|Spanish,Released,Enter the world of Pandora.,Avatar,7.588,32112
2,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,Star Wars Collection,245000000,Adventure|Action|Science Fiction,140607,US,en,Thirty years after defeating the Galactic Empi...,12.7841,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,...,United States of America,2015-12-15,2068223624,136,English,Released,Every generation has a story.,Star Wars: The Force Awakens,7.262,19665
3,/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg,The Avengers Collection,300000000,Adventure|Action|Science Fiction,299536,US,en,As the Avengers and their allies have continue...,32.6907,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,...,United States of America,2018-04-25,2052415039,149,English|Xhosa,Released,Destiny arrives all the same.,Avengers: Infinity War,8.236,30386
4,/sCzcYW9h55WcesOqA12cgEr9Exw.jpg,,200000000,Drama|Romance,597,US,en,101-year-old Rose DeWitt Bukater tells the sto...,31.6333,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,...,United States of America,1997-11-18,2264162353,194,English|French|German|Swedish|Italian|Russian,Released,Nothing on Earth could come between them.,Titanic,7.905,25867
5,/aIGIYJTyOkEVUmEd3z5x6diYsFx.jpg,Jurassic Park Collection,150000000,Action|Adventure|Science Fiction|Thriller,135397,US,en,Twenty-two years after the events of Jurassic ...,19.5099,/rhr4y79GpxQF9IsfJItRXVaoGs4.jpg,...,United States of America,2015-06-06,1671537444,124,English,Released,The park is open.,Jurassic World,6.7,20623
6,/1TUg5pO1VZ4B0Q1amk3OlXvlpXV.jpg,The Lion King (Reboot) Collection,260000000,Adventure|Drama|Family|Animation,420818,US,en,"Simba idolizes his father, King Mufasa, and ta...",22.2842,/dzBtMocZuJbjLOXvrl4zGYigDzh.jpg,...,United States of America,2019-07-12,1662020819,118,English,Released,The king has returned.,The Lion King,7.109,10301
7,/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg,The Avengers Collection,220000000,Science Fiction|Action|Adventure,24428,US,en,When an unexpected enemy emerges and threatens...,33.8215,/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg,...,United States of America,2012-04-25,1518815515,143,English|Hindi|Russian,Released,Some assembly required.,The Avengers,7.734,31505
8,/cHkhb5A4gQRK6zs6Pv7zorHs8Nk.jpg,The Fast and the Furious Collection,190000000,Action|Thriller|Crime,168259,US,en,Deckard Shaw seeks revenge against Dominic Tor...,14.2561,/wurKlC3VKUgcfsn0K51MJYEleS2.jpg,...,United States of America,2015-04-01,1515400000,137,Arabic|English|Spanish|Thai,Released,Vengeance hits home.,Furious 7,7.225,10761
9,/6YwkGolwdOMNpbTOmLjoehlVWs5.jpg,The Avengers Collection,365000000,Action|Adventure|Science Fiction,99861,US,en,When Tony Stark tries to jumpstart a dormant p...,17.2051,/4ssDuvEDkSArWEdyBl2X5EHvYKU.jpg,...,United States of America,2015-04-22,1405403694,141,English,Released,A new age has come.,Avengers: Age of Ultron,7.271,23345


In [None]:
# Coverting columns to specific datatypes
movies_df['budget'] = pd.to_numeric(movies_df['budget'], errors='coerce')
movies_df['id'] = pd.to_numeric(movies_df['id'], errors='coerce')
movies_df['popularity'] = pd.to_numeric(movies_df['popularity'], errors='coerce')

movies_df['release_date'] = pd.to_datetime(movies_df['release_date'], errors='coerce')
movies_df

Unnamed: 0,backdrop_path,belongs_to_collection,budget,genres,id,origin_country,original_language,overview,popularity,poster_path,...,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,The Avengers Collection,356000000,Adventure|Science Fiction|Action,299534,US,en,After the devastating events of Avengers: Infi...,22.2597,/ulzhLuWrPK07P1YkdWQLZnQh1JL.jpg,...,United States of America,2019-04-24,2799439100,181,English|Japanese|Xhosa,Released,Avenge the fallen.,Avengers: Endgame,8.238,26207
1,/vL5LR6WdxWPjLPFRLe133jXWsh5.jpg,Avatar Collection,237000000,Action|Adventure|Fantasy|Science Fiction,19995,US,en,"In the 22nd century, a paraplegic Marine is di...",31.3525,/kyeqWdyUXW608qlYkRqosgbbJyK.jpg,...,United States of America|United Kingdom,2009-12-15,2923706026,162,English|Spanish,Released,Enter the world of Pandora.,Avatar,7.588,32112
2,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,Star Wars Collection,245000000,Adventure|Action|Science Fiction,140607,US,en,Thirty years after defeating the Galactic Empi...,12.7841,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,...,United States of America,2015-12-15,2068223624,136,English,Released,Every generation has a story.,Star Wars: The Force Awakens,7.262,19665
3,/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg,The Avengers Collection,300000000,Adventure|Action|Science Fiction,299536,US,en,As the Avengers and their allies have continue...,32.6907,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,...,United States of America,2018-04-25,2052415039,149,English|Xhosa,Released,Destiny arrives all the same.,Avengers: Infinity War,8.236,30386
4,/sCzcYW9h55WcesOqA12cgEr9Exw.jpg,,200000000,Drama|Romance,597,US,en,101-year-old Rose DeWitt Bukater tells the sto...,31.6333,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,...,United States of America,1997-11-18,2264162353,194,English|French|German|Swedish|Italian|Russian,Released,Nothing on Earth could come between them.,Titanic,7.905,25867
5,/aIGIYJTyOkEVUmEd3z5x6diYsFx.jpg,Jurassic Park Collection,150000000,Action|Adventure|Science Fiction|Thriller,135397,US,en,Twenty-two years after the events of Jurassic ...,19.5099,/rhr4y79GpxQF9IsfJItRXVaoGs4.jpg,...,United States of America,2015-06-06,1671537444,124,English,Released,The park is open.,Jurassic World,6.7,20623
6,/1TUg5pO1VZ4B0Q1amk3OlXvlpXV.jpg,The Lion King (Reboot) Collection,260000000,Adventure|Drama|Family|Animation,420818,US,en,"Simba idolizes his father, King Mufasa, and ta...",22.2842,/dzBtMocZuJbjLOXvrl4zGYigDzh.jpg,...,United States of America,2019-07-12,1662020819,118,English,Released,The king has returned.,The Lion King,7.109,10301
7,/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg,The Avengers Collection,220000000,Science Fiction|Action|Adventure,24428,US,en,When an unexpected enemy emerges and threatens...,33.8215,/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg,...,United States of America,2012-04-25,1518815515,143,English|Hindi|Russian,Released,Some assembly required.,The Avengers,7.734,31505
8,/cHkhb5A4gQRK6zs6Pv7zorHs8Nk.jpg,The Fast and the Furious Collection,190000000,Action|Thriller|Crime,168259,US,en,Deckard Shaw seeks revenge against Dominic Tor...,14.2561,/wurKlC3VKUgcfsn0K51MJYEleS2.jpg,...,United States of America,2015-04-01,1515400000,137,Arabic|English|Spanish|Thai,Released,Vengeance hits home.,Furious 7,7.225,10761
9,/6YwkGolwdOMNpbTOmLjoehlVWs5.jpg,The Avengers Collection,365000000,Action|Adventure|Science Fiction,99861,US,en,When Tony Stark tries to jumpstart a dormant p...,17.2051,/4ssDuvEDkSArWEdyBl2X5EHvYKU.jpg,...,United States of America,2015-04-22,1405403694,141,English,Released,A new age has come.,Avengers: Age of Ultron,7.271,23345


## Handling Missing and Incorrect Data

In [None]:
## Replacing invalid data with Nan
columns = ['budget', 'revenue', 'runtime']

movies_df[columns] = movies_df[columns].replace(0, pd.NA)
movies_df[['overview', 'tagline']] = movies_df[['overview', 'tagline']].replace('No Data', pd.NA)

#Convert column value to million USD
movies_df['budget'] = movies_df['budget']/1000000 
movies_df.rename({'budget':'budget_musd'}, axis=1, inplace=True)

movies_df['revenue'] = movies_df['revenue']/1000000 
movies_df.rename({'revenue':'revenue_musd'}, axis=1, inplace=True)



#Drop Duplicate
movies_df.drop_duplicates(inplace=True, ignore_index=True)

#drop unknown id and title
movies_df.dropna(how='any', subset=['id', 'title'], ignore_index=True, inplace=True)

#kekep only roow where at least 1- columns have non_Non values
movies_df.dropna(thresh=10, ignore_index=True, inplace=True)

# filter to include only released movies
movies_df = movies_df[movies_df['status']=='Released']
#drop status column
movies_df = movies_df.drop(columns='status')
movies_df

Unnamed: 0,backdrop_path,belongs_to_collection,budget_musd,genres,id,origin_country,original_language,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue_musd,runtime,spoken_languages,tagline,title,vote_average,vote_count
0,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,The Avengers Collection,356.0,Adventure|Science Fiction|Action,299534,US,en,After the devastating events of Avengers: Infi...,22.2597,/ulzhLuWrPK07P1YkdWQLZnQh1JL.jpg,Marvel Studios,United States of America,2019-04-24,2799.4391,181,English|Japanese|Xhosa,Avenge the fallen.,Avengers: Endgame,8.238,26207
1,/vL5LR6WdxWPjLPFRLe133jXWsh5.jpg,Avatar Collection,237.0,Action|Adventure|Fantasy|Science Fiction,19995,US,en,"In the 22nd century, a paraplegic Marine is di...",31.3525,/kyeqWdyUXW608qlYkRqosgbbJyK.jpg,Dune Entertainment|Lightstorm Entertainment|20...,United States of America|United Kingdom,2009-12-15,2923.706026,162,English|Spanish,Enter the world of Pandora.,Avatar,7.588,32112
2,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,Star Wars Collection,245.0,Adventure|Action|Science Fiction,140607,US,en,Thirty years after defeating the Galactic Empi...,12.7841,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,Lucasfilm Ltd.|Bad Robot,United States of America,2015-12-15,2068.223624,136,English,Every generation has a story.,Star Wars: The Force Awakens,7.262,19665
3,/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg,The Avengers Collection,300.0,Adventure|Action|Science Fiction,299536,US,en,As the Avengers and their allies have continue...,32.6907,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,Marvel Studios,United States of America,2018-04-25,2052.415039,149,English|Xhosa,Destiny arrives all the same.,Avengers: Infinity War,8.236,30386
4,/sCzcYW9h55WcesOqA12cgEr9Exw.jpg,,200.0,Drama|Romance,597,US,en,101-year-old Rose DeWitt Bukater tells the sto...,31.6333,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,Paramount Pictures|20th Century Fox|Lightstorm...,United States of America,1997-11-18,2264.162353,194,English|French|German|Swedish|Italian|Russian,Nothing on Earth could come between them.,Titanic,7.905,25867
5,/aIGIYJTyOkEVUmEd3z5x6diYsFx.jpg,Jurassic Park Collection,150.0,Action|Adventure|Science Fiction|Thriller,135397,US,en,Twenty-two years after the events of Jurassic ...,19.5099,/rhr4y79GpxQF9IsfJItRXVaoGs4.jpg,Amblin Entertainment|Universal Pictures,United States of America,2015-06-06,1671.537444,124,English,The park is open.,Jurassic World,6.7,20623
6,/1TUg5pO1VZ4B0Q1amk3OlXvlpXV.jpg,The Lion King (Reboot) Collection,260.0,Adventure|Drama|Family|Animation,420818,US,en,"Simba idolizes his father, King Mufasa, and ta...",22.2842,/dzBtMocZuJbjLOXvrl4zGYigDzh.jpg,Walt Disney Pictures|Fairview Entertainment,United States of America,2019-07-12,1662.020819,118,English,The king has returned.,The Lion King,7.109,10301
7,/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg,The Avengers Collection,220.0,Science Fiction|Action|Adventure,24428,US,en,When an unexpected enemy emerges and threatens...,33.8215,/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg,Marvel Studios,United States of America,2012-04-25,1518.815515,143,English|Hindi|Russian,Some assembly required.,The Avengers,7.734,31505
8,/cHkhb5A4gQRK6zs6Pv7zorHs8Nk.jpg,The Fast and the Furious Collection,190.0,Action|Thriller|Crime,168259,US,en,Deckard Shaw seeks revenge against Dominic Tor...,14.2561,/wurKlC3VKUgcfsn0K51MJYEleS2.jpg,Original Film|One Race|Universal Pictures,United States of America,2015-04-01,1515.4,137,Arabic|English|Spanish|Thai,Vengeance hits home.,Furious 7,7.225,10761
9,/6YwkGolwdOMNpbTOmLjoehlVWs5.jpg,The Avengers Collection,365.0,Action|Adventure|Science Fiction,99861,US,en,When Tony Stark tries to jumpstart a dormant p...,17.2051,/4ssDuvEDkSArWEdyBl2X5EHvYKU.jpg,Marvel Studios,United States of America,2015-04-22,1405.403694,141,English,A new age has come.,Avengers: Age of Ultron,7.271,23345


In [None]:
# Replacing Movies with vote_count = 0 with avearge count per genre
movies_df['genres_list'] = movies_df['genres'].str.split('|')
movies_df_exploded = movies_df.explode('genres_list')
movies_df_exploded_valid_votes = movies_df_exploded.dropna(subset=['vote_count', 'vote_average'])
genres_stats = movies_df_exploded_valid_votes.groupby('genres_list').agg({
    'vote_count': 'mean',
    'vote_average': 'mean'
}).rename(columns={'vote_count': "genre_vote_count", "vote_average": "genre_vote_average"}).reset_index().sort_values('genre_vote_average', ascending=False)
movies_df_exploded = movies_df_exploded.merge(genres_stats, on='genres_list', how='left')




filled_votes = movies_df_exploded.groupby('id').agg({
    "genre_vote_count": "mean",
    "genre_vote_average": "mean"
}).rename(columns={"genre_vote_count": "filled_vote_count", "genre_vote_average":"filled_vote_average"})
# print(filled_votes)
movies_df = movies_df.merge(filled_votes, on='id', how='left')
movies_df['vote_average'] = movies_df['vote_average'].fillna(movies_df['filled_vote_average']).astype(float)
movies_df['vote_count'] = movies_df['vote_count'].fillna(movies_df['filled_vote_count']).astype(int)
movies_df['vote_count'] = np.where(movies_df['vote_count']==0, movies_df['filled_vote_count'], movies_df['vote_count'])
movies_df.drop(columns=["filled_vote_count","genres_list", "filled_vote_average"], inplace=True)


In [None]:
reordered_movies_df = movies_df.reindex(['id', 'title', 'tagline', 'release_date', 'genres', 'belongs_to_collection', 'original_language',
                                'budget_musd', 'revenue_musd', 'production_companies','production_countries', 'vote_count', 'vote_average', 'popularity', 'runtime',
'overview', 'spoken_languages', 'poster_path',], axis=1)
reordered_movies_df.reset_index(drop=True, inplace=True)
reordered_movies_df

Unnamed: 0,id,title,tagline,release_date,genres,belongs_to_collection,original_language,budget_musd,revenue_musd,production_companies,production_countries,vote_count,vote_average,popularity,runtime,overview,spoken_languages,poster_path
0,299534,Avengers: Endgame,Avenge the fallen.,2019-04-24,Adventure|Science Fiction|Action,The Avengers Collection,en,356.0,2799.4391,Marvel Studios,United States of America,26207.0,8.238,22.2597,181,After the devastating events of Avengers: Infi...,English|Japanese|Xhosa,/ulzhLuWrPK07P1YkdWQLZnQh1JL.jpg
1,19995,Avatar,Enter the world of Pandora.,2009-12-15,Action|Adventure|Fantasy|Science Fiction,Avatar Collection,en,237.0,2923.706026,Dune Entertainment|Lightstorm Entertainment|20...,United States of America|United Kingdom,32112.0,7.588,31.3525,162,"In the 22nd century, a paraplegic Marine is di...",English|Spanish,/kyeqWdyUXW608qlYkRqosgbbJyK.jpg
2,140607,Star Wars: The Force Awakens,Every generation has a story.,2015-12-15,Adventure|Action|Science Fiction,Star Wars Collection,en,245.0,2068.223624,Lucasfilm Ltd.|Bad Robot,United States of America,19665.0,7.262,12.7841,136,Thirty years after defeating the Galactic Empi...,English,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg
3,299536,Avengers: Infinity War,Destiny arrives all the same.,2018-04-25,Adventure|Action|Science Fiction,The Avengers Collection,en,300.0,2052.415039,Marvel Studios,United States of America,30386.0,8.236,32.6907,149,As the Avengers and their allies have continue...,English|Xhosa,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg
4,597,Titanic,Nothing on Earth could come between them.,1997-11-18,Drama|Romance,,en,200.0,2264.162353,Paramount Pictures|20th Century Fox|Lightstorm...,United States of America,25867.0,7.905,31.6333,194,101-year-old Rose DeWitt Bukater tells the sto...,English|French|German|Swedish|Italian|Russian,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg
5,135397,Jurassic World,The park is open.,2015-06-06,Action|Adventure|Science Fiction|Thriller,Jurassic Park Collection,en,150.0,1671.537444,Amblin Entertainment|Universal Pictures,United States of America,20623.0,6.7,19.5099,124,Twenty-two years after the events of Jurassic ...,English,/rhr4y79GpxQF9IsfJItRXVaoGs4.jpg
6,420818,The Lion King,The king has returned.,2019-07-12,Adventure|Drama|Family|Animation,The Lion King (Reboot) Collection,en,260.0,1662.020819,Walt Disney Pictures|Fairview Entertainment,United States of America,10301.0,7.109,22.2842,118,"Simba idolizes his father, King Mufasa, and ta...",English,/dzBtMocZuJbjLOXvrl4zGYigDzh.jpg
7,24428,The Avengers,Some assembly required.,2012-04-25,Science Fiction|Action|Adventure,The Avengers Collection,en,220.0,1518.815515,Marvel Studios,United States of America,31505.0,7.734,33.8215,143,When an unexpected enemy emerges and threatens...,English|Hindi|Russian,/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg
8,168259,Furious 7,Vengeance hits home.,2015-04-01,Action|Thriller|Crime,The Fast and the Furious Collection,en,190.0,1515.4,Original Film|One Race|Universal Pictures,United States of America,10761.0,7.225,14.2561,137,Deckard Shaw seeks revenge against Dominic Tor...,Arabic|English|Spanish|Thai,/wurKlC3VKUgcfsn0K51MJYEleS2.jpg
9,99861,Avengers: Age of Ultron,A new age has come.,2015-04-22,Action|Adventure|Science Fiction,The Avengers Collection,en,365.0,1405.403694,Marvel Studios,United States of America,23345.0,7.271,17.2051,141,When Tony Stark tries to jumpstart a dormant p...,English,/4ssDuvEDkSArWEdyBl2X5EHvYKU.jpg


## KPI Implementation & Analysis

### Identifying the Best/Wok Performing Movies

In [None]:
# Finding the highest Rank
def highest_revenue(df):
    return df.sort_values('revenue_musd', ascending=False, ignore_index=True).iloc[0]

def highest_budget(df):
    return df.sort_values('budget_musd', ascending=False, ignore_index=True).iloc[0]


reordered_movies_df['profit_musd'] = reordered_movies_df['revenue_musd'] - reordered_movies_df['budget_musd']
def highest_profit(df):
    return df.sort_values('profit_musd', ascending=False, ignore_index=True).iloc[0]

def lowest_profit(df):
    return df.sort_values('profit_musd', ascending=True, ignore_index=True).iloc[0]

def most_voted(df):
    return df.sort_values('vote_count', ascending=False, ignore_index=True).iloc[0]

def most_pupular(df):
    return df.sort_values('popularity', ascending=False, ignore_index=True).iloc[0]

def highest_roi(df):
    filtered_df = df[df['budget_musd']>=10].copy()
    filtered_df['roi_musd'] = filtered_df['revenue_musd']/filtered_df['budget_musd']
    return filtered_df.sort_values('roi_musd', ascending=False, ignore_index=True).iloc[0]

def lowest_roi(df):
    filtered_df = df[df['budget_musd']>=10].copy()
    filtered_df['roi_musd'] = filtered_df['revenue_musd']/filtered_df['budget_musd']
    return filtered_df.sort_values('roi_musd', ascending=True, ignore_index=True).iloc[0]

def highest_rated(df):
    filtered_df = df[df['vote_count']>=10].copy()
    return filtered_df.sort_values('vote_average', ascending=False, ignore_index=True).iloc[0]

def lowest_rated(df):
    filtered_df = df[df['vote_count']>=10].copy()
    return filtered_df.sort_values('vote_average', ascending=True, ignore_index=True).iloc[0]

lowest_rated(reordered_movies_df)



id                                                                  351286
title                                       Jurassic World: Fallen Kingdom
tagline                                                  The park is gone.
release_date                                           2018-06-06 00:00:00
genres                           Action|Adventure|Science Fiction|Thriller
belongs_to_collection                             Jurassic Park Collection
original_language                                                       en
budget_musd                                                          170.0
revenue_musd                                                   1310.466296
production_companies               Amblin Entertainment|Universal Pictures
production_countries                              United States of America
vote_count                                                         12004.0
vote_average                                                         6.538
popularity               

## Franchise vs. Standalone Movie Performance

In [None]:
##Comparing interms of mean revenue
def frachise_vrs_standalone(df):
    df_copy = df.copy()
    df_copy['collection_type'] = df_copy['belongs_to_collection'].apply(
        lambda x: 'Standalone' if pd.isna(x) else 'Franchise'
    )
    df_copy['roi_musd'] = df_copy['revenue_musd']/df_copy['budget_musd']
    return df_copy.groupby('collection_type', dropna=False).agg({
        'revenue_musd': 'mean',
        'id': 'count',
        'budget_musd': 'mean',
        'popularity': 'mean', 
        'vote_average': 'mean',
        'roi_musd': 'median'
    }).rename(columns={'revenue_musd': 'mean_revenue_musd', 'id':'movie_count', 
                       'roi_musd': 'median_roi_musd',
                       'budget_musd':'mean_budget_musd', 'popularity': 'mean_popularity', 
                       'vote_average':'mean_rating'})

frachise_vrs_standalone(reordered_movies_df)

Unnamed: 0_level_0,mean_revenue_musd,movie_count,mean_budget_musd,mean_popularity,mean_rating,median_roi_musd
collection_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Franchise,1682.641971,16,219.875,19.527856,7.379437,7.786109
Standalone,1765.139159,2,180.0,24.37095,7.438,9.617018


### Most Succesful Franchises

In [None]:
## Based on total number of movies in franchise
most_successfull_in_franchise_by = reordered_movies_df.groupby('belongs_to_collection')['id'].count().sort_values(ascending=False).head(1)

## Based on total number of movies and mean budget in franchise
reordered_movies_df.groupby('belongs_to_collection')[['id', 'budget_musd']].agg({
    'id': 'count',
    'budget_musd': 'mean'
}).sort_values(['id', 'budget_musd'], ascending=False)\
       .rename(columns={'id':'total_movies','budget_musd': 'mean_budget_musd'}).head(1)

## Based on total number of movies and mean revenue in franchise
reordered_movies_df.groupby('belongs_to_collection')[['id', 'revenue_musd']].agg({
    'id': 'count',
    'revenue_musd': 'mean'
}).sort_values(['id', 'revenue_musd'], ascending=False)\
    .rename(columns={'id':'total_movies','revenue_musd': 'mean_revenue_musd'}).head(1)

## Based on total number of movies and mean revenue in franchise
reordered_movies_df.groupby('belongs_to_collection')[['id', 'vote_average']].agg({
    'vote_average': 'mean'
}).sort_values([ 'vote_average'], ascending=False)\
    .rename(columns={'vote_average':'mean_avearge'}).head(1)


Unnamed: 0_level_0,mean_avearge
belongs_to_collection,Unnamed: 1_level_1
Harry Potter Collection,8.087


In [None]:
reordered_movies_df

Unnamed: 0,id,title,tagline,release_date,genres,belongs_to_collection,original_language,budget_musd,revenue_musd,production_companies,production_countries,vote_count,vote_average,popularity,runtime,overview,spoken_languages,poster_path,profit_musd
0,299534,Avengers: Endgame,Avenge the fallen.,2019-04-24,Adventure|Science Fiction|Action,The Avengers Collection,en,356.0,2799.4391,Marvel Studios,United States of America,26207.0,8.238,22.2597,181,After the devastating events of Avengers: Infi...,English|Japanese|Xhosa,/ulzhLuWrPK07P1YkdWQLZnQh1JL.jpg,2443.4391
1,19995,Avatar,Enter the world of Pandora.,2009-12-15,Action|Adventure|Fantasy|Science Fiction,Avatar Collection,en,237.0,2923.706026,Dune Entertainment|Lightstorm Entertainment|20...,United States of America|United Kingdom,32112.0,7.588,31.3525,162,"In the 22nd century, a paraplegic Marine is di...",English|Spanish,/kyeqWdyUXW608qlYkRqosgbbJyK.jpg,2686.706026
2,140607,Star Wars: The Force Awakens,Every generation has a story.,2015-12-15,Adventure|Action|Science Fiction,Star Wars Collection,en,245.0,2068.223624,Lucasfilm Ltd.|Bad Robot,United States of America,19665.0,7.262,12.7841,136,Thirty years after defeating the Galactic Empi...,English,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,1823.223624
3,299536,Avengers: Infinity War,Destiny arrives all the same.,2018-04-25,Adventure|Action|Science Fiction,The Avengers Collection,en,300.0,2052.415039,Marvel Studios,United States of America,30386.0,8.236,32.6907,149,As the Avengers and their allies have continue...,English|Xhosa,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,1752.415039
4,597,Titanic,Nothing on Earth could come between them.,1997-11-18,Drama|Romance,,en,200.0,2264.162353,Paramount Pictures|20th Century Fox|Lightstorm...,United States of America,25867.0,7.905,31.6333,194,101-year-old Rose DeWitt Bukater tells the sto...,English|French|German|Swedish|Italian|Russian,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,2064.162353
5,135397,Jurassic World,The park is open.,2015-06-06,Action|Adventure|Science Fiction|Thriller,Jurassic Park Collection,en,150.0,1671.537444,Amblin Entertainment|Universal Pictures,United States of America,20623.0,6.7,19.5099,124,Twenty-two years after the events of Jurassic ...,English,/rhr4y79GpxQF9IsfJItRXVaoGs4.jpg,1521.537444
6,420818,The Lion King,The king has returned.,2019-07-12,Adventure|Drama|Family|Animation,The Lion King (Reboot) Collection,en,260.0,1662.020819,Walt Disney Pictures|Fairview Entertainment,United States of America,10301.0,7.109,22.2842,118,"Simba idolizes his father, King Mufasa, and ta...",English,/dzBtMocZuJbjLOXvrl4zGYigDzh.jpg,1402.020819
7,24428,The Avengers,Some assembly required.,2012-04-25,Science Fiction|Action|Adventure,The Avengers Collection,en,220.0,1518.815515,Marvel Studios,United States of America,31505.0,7.734,33.8215,143,When an unexpected enemy emerges and threatens...,English|Hindi|Russian,/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg,1298.815515
8,168259,Furious 7,Vengeance hits home.,2015-04-01,Action|Thriller|Crime,The Fast and the Furious Collection,en,190.0,1515.4,Original Film|One Race|Universal Pictures,United States of America,10761.0,7.225,14.2561,137,Deckard Shaw seeks revenge against Dominic Tor...,Arabic|English|Spanish|Thai,/wurKlC3VKUgcfsn0K51MJYEleS2.jpg,1325.4
9,99861,Avengers: Age of Ultron,A new age has come.,2015-04-22,Action|Adventure|Science Fiction,The Avengers Collection,en,365.0,1405.403694,Marvel Studios,United States of America,23345.0,7.271,17.2051,141,When Tony Stark tries to jumpstart a dormant p...,English,/4ssDuvEDkSArWEdyBl2X5EHvYKU.jpg,1040.403694
