# 🎬 TMDB Movie Analysis Project

This notebook performs a structured analysis on movie data fetched from The Movie Database (TMDB) API. It covers:
- Data extraction
- Cleaning and transformation
- KPI analysis
- Visualizations


In [40]:
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime


## Function Group 1: Data Extraction
Fetch movie data from the TMDB API.

In [41]:
movie_ids = [0, 299534, 19995, 140607, 299536, 597, 135397,
420818, 24428, 168259, 99861, 284054, 12445,
181808, 330457, 351286, 109445, 321612, 260513]

In [42]:
def fetch_movie_data(movie_ids, api_key):
    """Fetch movie details and credits from TMDB API for the given list of movie IDs."""
    url = "https://api.themoviedb.org/3/movie/{}?api_key={}&append_to_response=credits"
    movie_data = []

    for movie_id in movie_ids:
        response = requests.get(url.format(movie_id, api_key))
        if response.status_code == 200:
            movie_data.append(response.json())
        else:
            print(f"Failed for ID {movie_id}: {response.status_code}")
    return pd.DataFrame(movie_data)

In [43]:
data = fetch_movie_data(movie_ids, api_key='fc67bc86d72331280543a2761748f0e6')

Failed for ID 0: 404


## 🧩 Function Group 2: Data Cleaning & Preprocessing
Cleans the fetched movie DataFrame, restructures columns, and handles missing values.


### Extract Credits: Cast, Crew, Director

In [44]:
def extract_credits_info(df):
    """
    Extracts cast, crew, and director info from 'credits' column (nested JSON).
    Adds: cast, cast_size, crew_size, director columns.
    """
    def get_director(crew_list):
        if isinstance(crew_list, list):
            for member in crew_list:
                if member.get('job') == 'Director':
                    return member.get('name')
        return np.nan

    def get_cast_names(cast_list):
        if isinstance(cast_list, list):
            return '|'.join([member.get('name') for member in cast_list[:5]])
        return np.nan

    df['cast'] = df['credits'].apply(lambda x: get_cast_names(x['cast']) if isinstance(x, dict) else np.nan)
    df['cast_size'] = df['credits'].apply(lambda x: len(x['cast']) if isinstance(x, dict) and 'cast' in x else 0)
    df['crew_size'] = df['credits'].apply(lambda x: len(x['crew']) if isinstance(x, dict) and 'crew' in x else 0)
    df['director'] = df['credits'].apply(lambda x: get_director(x['crew']) if isinstance(x, dict) else np.nan)
    return df


In [45]:
extract_credits_info(data)

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,origin_country,original_language,...,tagline,title,video,vote_average,vote_count,credits,cast,cast_size,crew_size,director
0,False,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",356000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...",https://www.marvel.com/movies/avengers-endgame,299534,tt4154796,[US],en,...,Avenge the fallen.,Avengers: Endgame,False,8.238,26209,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Robert Downey Jr.|Chris Evans|Mark Ruffalo|Chr...,105,593,Anthony Russo
1,False,/vL5LR6WdxWPjLPFRLe133jXWsh5.jpg,"{'id': 87096, 'name': 'Avatar Collection', 'po...",237000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://www.avatar.com/movies/avatar,19995,tt0499549,[US],en,...,Enter the world of Pandora.,Avatar,False,7.588,32115,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Sam Worthington|Zoe Saldaña|Sigourney Weaver|S...,65,986,James Cameron
2,False,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,"{'id': 10, 'name': 'Star Wars Collection', 'po...",245000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",http://www.starwars.com/films/star-wars-episod...,140607,tt2488496,[US],en,...,Every generation has a story.,Star Wars: The Force Awakens,False,7.262,19666,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Harrison Ford|Mark Hamill|Carrie Fisher|Adam D...,182,257,J.J. Abrams
3,False,/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",300000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",https://www.marvel.com/movies/avengers-infinit...,299536,tt4154756,[US],en,...,Destiny arrives all the same.,Avengers: Infinity War,False,8.236,30386,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Robert Downey Jr.|Chris Evans|Chris Hemsworth|...,69,724,Joe Russo
4,False,/sCzcYW9h55WcesOqA12cgEr9Exw.jpg,,200000000,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",https://www.paramountmovies.com/movies/titanic,597,tt0120338,[US],en,...,Nothing on Earth could come between them.,Titanic,False,7.9,25867,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Leonardo DiCaprio|Kate Winslet|Billy Zane|Kath...,116,258,James Cameron
5,False,/aIGIYJTyOkEVUmEd3z5x6diYsFx.jpg,"{'id': 328, 'name': 'Jurassic Park Collection'...",150000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://www.jurassicworld.com/,135397,tt0369610,[US],en,...,The park is open.,Jurassic World,False,6.7,20623,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Chris Pratt|Bryce Dallas Howard|Irrfan Khan|Vi...,52,423,Colin Trevorrow
6,False,/1TUg5pO1VZ4B0Q1amk3OlXvlpXV.jpg,"{'id': 762512, 'name': 'The Lion King (Reboot)...",260000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 18, '...",https://movies.disney.com/the-lion-king-2019,420818,tt6105098,[US],en,...,The king has returned.,The Lion King,False,7.109,10302,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Chiwetel Ejiofor|John Oliver|Donald Glover|Jam...,20,44,Jon Favreau
7,False,/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",220000000,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",https://www.marvel.com/movies/the-avengers,24428,tt0848228,[US],en,...,Some assembly required.,The Avengers,False,7.734,31507,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Robert Downey Jr.|Chris Evans|Mark Ruffalo|Chr...,112,631,Joss Whedon
8,False,/cHkhb5A4gQRK6zs6Pv7zorHs8Nk.jpg,"{'id': 9485, 'name': 'The Fast and the Furious...",190000000,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",https://www.uphe.com/movies/furious-7,168259,tt2820852,[US],en,...,Vengeance hits home.,Furious 7,False,7.225,10762,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Vin Diesel|Paul Walker|Jason Statham|Michelle ...,48,222,James Wan
9,False,/6YwkGolwdOMNpbTOmLjoehlVWs5.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",365000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://www.marvel.com/movies/avengers-age-of-...,99861,tt2395427,[US],en,...,A new age has come.,Avengers: Age of Ultron,False,7.271,23347,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Robert Downey Jr.|Chris Hemsworth|Mark Ruffalo...,72,636,Joss Whedon


In [46]:
def extract_names(obj):
    if isinstance(obj, list):
        return '|'.join(d['name'] for d in obj)
    return None

def clean_movie_data(df):
    """
    Cleans and prepares movie data for analysis.
    Drops unused columns, flattens JSON structures, converts types.
    """
    df.drop(columns=['adult', 'imdb_id', 'original_title', 'video', 'homepage'], inplace=True)

    df['genres'] = df['genres'].apply(extract_names)
    df['production_countries'] = df['production_countries'].apply(extract_names)
    df['production_companies'] = df['production_companies'].apply(extract_names)
    df['spoken_languages'] = df['spoken_languages'].apply(extract_names)
    df['belongs_to_collection'] = df['belongs_to_collection'].apply(
        lambda x: x['name'] if pd.notna(x) and isinstance(x, dict) else None
    )

    df['budget'] = pd.to_numeric(df['budget']).replace(0, np.nan)
    df['revenue'] = pd.to_numeric(df['revenue']).replace(0, np.nan)
    df['popularity'] = pd.to_numeric(df['popularity'])
    df['release_date'] = pd.to_datetime(df['release_date'])
    df['runtime'] = pd.to_numeric(df['runtime']).replace(0, np.nan)

    df['budget_musd'] = df['budget'] / 1e6
    df['revenue_musd'] = df['revenue'] / 1e6

    df.drop_duplicates(subset=['id', 'title'], inplace=True)
    df = df[df['id'].notna() & df['title'].notna()]
    df = df[df.notna().sum(axis=1) >= 10]
    df = df[df['status'] == 'Released']
    df.drop(columns=['status'], inplace=True)

    final_cols = ['id', 'title', 'tagline', 'release_date', 'genres', 'belongs_to_collection',
              'original_language', 'budget_musd', 'revenue_musd', 'production_companies',
              'production_countries', 'vote_count', 'vote_average', 'popularity', 'runtime',
              'overview', 'spoken_languages', 'poster_path', 'cast', 'cast_size', 'director', 'crew_size']

    df = df[[col for col in final_cols if col in df.columns]]

    return df.reset_index(drop=True)


In [47]:
clean_movie_data(data).head()

Unnamed: 0,id,title,tagline,release_date,genres,belongs_to_collection,original_language,budget_musd,revenue_musd,production_companies,...,vote_average,popularity,runtime,overview,spoken_languages,poster_path,cast,cast_size,director,crew_size
0,299534,Avengers: Endgame,Avenge the fallen.,2019-04-24,Adventure|Science Fiction|Action,The Avengers Collection,en,356.0,2799.4391,Marvel Studios,...,8.238,22.2597,181,After the devastating events of Avengers: Infi...,English|日本語|,/ulzhLuWrPK07P1YkdWQLZnQh1JL.jpg,Robert Downey Jr.|Chris Evans|Mark Ruffalo|Chr...,105,Anthony Russo,593
1,19995,Avatar,Enter the world of Pandora.,2009-12-15,Action|Adventure|Fantasy|Science Fiction,Avatar Collection,en,237.0,2923.706026,Dune Entertainment|Lightstorm Entertainment|20...,...,7.588,31.3525,162,"In the 22nd century, a paraplegic Marine is di...",English|Español,/kyeqWdyUXW608qlYkRqosgbbJyK.jpg,Sam Worthington|Zoe Saldaña|Sigourney Weaver|S...,65,James Cameron,986
2,140607,Star Wars: The Force Awakens,Every generation has a story.,2015-12-15,Adventure|Action|Science Fiction,Star Wars Collection,en,245.0,2068.223624,Lucasfilm Ltd.|Bad Robot,...,7.262,12.7841,136,Thirty years after defeating the Galactic Empi...,English,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,Harrison Ford|Mark Hamill|Carrie Fisher|Adam D...,182,J.J. Abrams,257
3,299536,Avengers: Infinity War,Destiny arrives all the same.,2018-04-25,Adventure|Action|Science Fiction,The Avengers Collection,en,300.0,2052.415039,Marvel Studios,...,8.236,32.6907,149,As the Avengers and their allies have continue...,English|,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,Robert Downey Jr.|Chris Evans|Chris Hemsworth|...,69,Joe Russo,724
4,597,Titanic,Nothing on Earth could come between them.,1997-11-18,Drama|Romance,,en,200.0,2264.162353,Paramount Pictures|20th Century Fox|Lightstorm...,...,7.9,31.6333,194,101-year-old Rose DeWitt Bukater tells the sto...,English|Français|Deutsch|svenska|Italiano|Pусский,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,Leonardo DiCaprio|Kate Winslet|Billy Zane|Kath...,116,James Cameron,258


In [51]:
def detect_anomalies(data):
    extracted_columns = ['belongs_to_collection', 'genres', 'production_countries',
                         'production_companies', 'spoken_languages']

    for col in extracted_columns:
        print('***********************************************************************')
        print(data[col].value_counts())

        # Noticed that the genres and production countries Columns had some values 
        # that can be one but due to positioning its seen a different value

    data['genres'] = data['genres'].replace('Adventure|Science Fiction|Action', 
                                            'Action|Adventure|Science Fiction')
    data['genres'] = data['genres'].replace('Adventure|Action|Science Fiction', 
                                            'Action|Adventure|Science Fiction')

    data['production_countries'] = data['production_countries'].replace(
        'United Kingdom|United States of America', 'United States of America|United Kingdom')   
    return data

In [52]:
detect_anomalies(data)

***********************************************************************
belongs_to_collection
The Avengers Collection                4
Star Wars Collection                   2
Jurassic Park Collection               2
Frozen Collection                      2
Avatar Collection                      1
The Lion King (Reboot) Collection      1
The Fast and the Furious Collection    1
Black Panther Collection               1
Harry Potter Collection                1
The Incredibles Collection             1
Name: count, dtype: int64
***********************************************************************
genres
Adventure|Action|Science Fiction             3
Action|Adventure|Science Fiction|Thriller    2
Action|Adventure|Science Fiction             2
Adventure|Science Fiction|Action             1
Action|Adventure|Fantasy|Science Fiction     1
Drama|Romance                                1
Adventure|Drama|Family|Animation             1
Science Fiction|Action|Adventure             1
Action|Thriller

Unnamed: 0,backdrop_path,belongs_to_collection,budget,genres,id,origin_country,original_language,overview,popularity,poster_path,...,title,vote_average,vote_count,credits,cast,cast_size,crew_size,director,budget_musd,revenue_musd
0,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,The Avengers Collection,356000000,Action|Adventure|Science Fiction,299534,[US],en,After the devastating events of Avengers: Infi...,22.2597,/ulzhLuWrPK07P1YkdWQLZnQh1JL.jpg,...,Avengers: Endgame,8.238,26209,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Robert Downey Jr.|Chris Evans|Mark Ruffalo|Chr...,105,593,Anthony Russo,356.0,2799.4391
1,/vL5LR6WdxWPjLPFRLe133jXWsh5.jpg,Avatar Collection,237000000,Action|Adventure|Fantasy|Science Fiction,19995,[US],en,"In the 22nd century, a paraplegic Marine is di...",31.3525,/kyeqWdyUXW608qlYkRqosgbbJyK.jpg,...,Avatar,7.588,32115,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Sam Worthington|Zoe Saldaña|Sigourney Weaver|S...,65,986,James Cameron,237.0,2923.706026
2,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,Star Wars Collection,245000000,Action|Adventure|Science Fiction,140607,[US],en,Thirty years after defeating the Galactic Empi...,12.7841,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,...,Star Wars: The Force Awakens,7.262,19666,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Harrison Ford|Mark Hamill|Carrie Fisher|Adam D...,182,257,J.J. Abrams,245.0,2068.223624
3,/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg,The Avengers Collection,300000000,Action|Adventure|Science Fiction,299536,[US],en,As the Avengers and their allies have continue...,32.6907,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,...,Avengers: Infinity War,8.236,30386,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Robert Downey Jr.|Chris Evans|Chris Hemsworth|...,69,724,Joe Russo,300.0,2052.415039
4,/sCzcYW9h55WcesOqA12cgEr9Exw.jpg,,200000000,Drama|Romance,597,[US],en,101-year-old Rose DeWitt Bukater tells the sto...,31.6333,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,...,Titanic,7.9,25867,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Leonardo DiCaprio|Kate Winslet|Billy Zane|Kath...,116,258,James Cameron,200.0,2264.162353
5,/aIGIYJTyOkEVUmEd3z5x6diYsFx.jpg,Jurassic Park Collection,150000000,Action|Adventure|Science Fiction|Thriller,135397,[US],en,Twenty-two years after the events of Jurassic ...,19.5099,/rhr4y79GpxQF9IsfJItRXVaoGs4.jpg,...,Jurassic World,6.7,20623,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Chris Pratt|Bryce Dallas Howard|Irrfan Khan|Vi...,52,423,Colin Trevorrow,150.0,1671.537444
6,/1TUg5pO1VZ4B0Q1amk3OlXvlpXV.jpg,The Lion King (Reboot) Collection,260000000,Adventure|Drama|Family|Animation,420818,[US],en,"Simba idolizes his father, King Mufasa, and ta...",22.2842,/dzBtMocZuJbjLOXvrl4zGYigDzh.jpg,...,The Lion King,7.109,10302,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Chiwetel Ejiofor|John Oliver|Donald Glover|Jam...,20,44,Jon Favreau,260.0,1662.020819
7,/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg,The Avengers Collection,220000000,Science Fiction|Action|Adventure,24428,[US],en,When an unexpected enemy emerges and threatens...,33.8215,/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg,...,The Avengers,7.734,31507,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Robert Downey Jr.|Chris Evans|Mark Ruffalo|Chr...,112,631,Joss Whedon,220.0,1518.815515
8,/cHkhb5A4gQRK6zs6Pv7zorHs8Nk.jpg,The Fast and the Furious Collection,190000000,Action|Thriller|Crime,168259,[US],en,Deckard Shaw seeks revenge against Dominic Tor...,14.2561,/wurKlC3VKUgcfsn0K51MJYEleS2.jpg,...,Furious 7,7.225,10762,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Vin Diesel|Paul Walker|Jason Statham|Michelle ...,48,222,James Wan,190.0,1515.4
9,/6YwkGolwdOMNpbTOmLjoehlVWs5.jpg,The Avengers Collection,365000000,Action|Adventure|Science Fiction,99861,[US],en,When Tony Stark tries to jumpstart a dormant p...,17.2051,/4ssDuvEDkSArWEdyBl2X5EHvYKU.jpg,...,Avengers: Age of Ultron,7.271,23347,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",Robert Downey Jr.|Chris Hemsworth|Mark Ruffalo...,72,636,Joss Whedon,365.0,1405.403694


In [53]:
data['genres'].value_counts()

genres
Action|Adventure|Science Fiction             6
Action|Adventure|Science Fiction|Thriller    2
Action|Adventure|Fantasy|Science Fiction     1
Drama|Romance                                1
Adventure|Drama|Family|Animation             1
Science Fiction|Action|Adventure             1
Action|Thriller|Crime                        1
Fantasy|Adventure                            1
Family|Animation|Adventure|Comedy|Fantasy    1
Animation|Family|Adventure|Fantasy           1
Family|Fantasy|Romance                       1
Action|Adventure|Animation|Family            1
Name: count, dtype: int64

# 📊 Step 3 – KPI and Movie Ranking


In [41]:
def add_kpi_columns(df):
    df['profit_musd'] = df['revenue_musd'] - df['budget_musd']
    df['roi'] = df['revenue_musd'] / df['budget_musd']
    return df

def rank_movies(df, metric, top_n=10, ascending=False, filters=None):
    if filters:
        df = df.query(filters)
    return df.sort_values(by=metric, ascending=ascending).head(top_n)


In [None]:
rank_movies(data, 'budget_musd')

Unnamed: 0,backdrop_path,belongs_to_collection,budget,genres,id,origin_country,original_language,overview,popularity,poster_path,...,vote_count,credits,budget_musd,revenue_musd,cast,cast_size,crew_size,director,profit_musd,roi
9,/6YwkGolwdOMNpbTOmLjoehlVWs5.jpg,The Avengers Collection,365000000,Action|Adventure|Science Fiction,99861,[US],en,When Tony Stark tries to jumpstart a dormant p...,17.2051,/4ssDuvEDkSArWEdyBl2X5EHvYKU.jpg,...,23347,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",365.0,1405.403694,Robert Downey Jr.|Chris Hemsworth|Mark Ruffalo...,72,636,Joss Whedon,1040.403694,3.850421
0,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,The Avengers Collection,356000000,Adventure|Science Fiction|Action,299534,[US],en,After the devastating events of Avengers: Infi...,22.2597,/ulzhLuWrPK07P1YkdWQLZnQh1JL.jpg,...,26209,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",356.0,2799.4391,Robert Downey Jr.|Chris Evans|Mark Ruffalo|Chr...,105,593,Anthony Russo,2443.4391,7.863593
3,/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg,The Avengers Collection,300000000,Adventure|Action|Science Fiction,299536,[US],en,As the Avengers and their allies have continue...,32.6907,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,...,30386,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",300.0,2052.415039,Robert Downey Jr.|Chris Evans|Chris Hemsworth|...,69,724,Joe Russo,1752.415039,6.841383
6,/1TUg5pO1VZ4B0Q1amk3OlXvlpXV.jpg,The Lion King (Reboot) Collection,260000000,Adventure|Drama|Family|Animation,420818,[US],en,"Simba idolizes his father, King Mufasa, and ta...",22.2842,/dzBtMocZuJbjLOXvrl4zGYigDzh.jpg,...,10302,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",260.0,1662.020819,Chiwetel Ejiofor|John Oliver|Donald Glover|Jam...,20,44,Jon Favreau,1402.020819,6.392388
2,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,Star Wars Collection,245000000,Adventure|Action|Science Fiction,140607,[US],en,Thirty years after defeating the Galactic Empi...,12.7841,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,...,19665,"{'cast': [{'adult': False, 'gender': 2, 'id': ...",245.0,2068.223624,Harrison Ford|Mark Hamill|Carrie Fisher|Adam D...,182,257,J.J. Abrams,1823.223624,8.441729


# 🔍 Step 4 – Custom Movie Filters


In [45]:
def filter_movies_by_actor_genre(df, actor_name, genre_filters):
    return df[
        df['genres'].str.contains(genre_filters[0], na=False) &
        df['genres'].str.contains(genre_filters[1], na=False) &
        df['cast'].str.contains(actor_name, na=False)
    ].sort_values(by='vote_average', ascending=False)

def filter_by_actor_director(df, actor, director):
    return df[
        df['cast'].str.contains(actor, na=False) &
        df['director'].str.contains(director, na=False)
    ].sort_values(by='runtime')

In [46]:
def franchise_vs_standalone():
    franchise = data[data['belongs_to_collection'].notna()]
    standalone = data[data['belongs_to_collection'].isna()]

    summary_stats = {
        'Group': ['Franchise', 'Standalone'],

        'Mean Revenue': [
            franchise['revenue_millions'].mean(),
            standalone['revenue_millions'].mean()
        ],

        'Mean ROI': [
            franchise['roi'].mean(),
            standalone['roi'].mean()
        ],

        'Mean Budget Raised': [
            franchise['budget_millions'].mean(),
            standalone['budget_millions'].mean()
        ],

        'Mean Popularity': [
            franchise['popularity'].mean(),
            standalone['popularity'].mean()
        ],

        'Mean Rating': [
            franchise['vote_average'].mean(),
            standalone['vote_average'].mean()
        ]
    }

    comparison_data = pd.DataFrame(summary_stats)

    return comparison_data

In [None]:
def analyze_franchise(data, sort_by = None, ascending = False):

    franchise = data[data['belongs_to_collection'].notna()]

    franchise_stat = franchise.groupby('belongs_to_collection').agg({
        'id': 'count',
        'budget_millions': ['sum', 'mean'],
        'revenue_millions': ['sum', 'mean'],
        'vote_average': 'mean'
    })

    franchise_stat.columns = ['total_movies', 'total_budget_millions', 'budget_mean', 'total_revenue_millions',
                              'revenue_mean', 'mean_rating']
    
    if sort_by:
        franchise_stat = franchise_stat.sort_values(by = sort_by, ascending = ascending)
    return franchise_stat


In [None]:
def analyze_directors(data, sort_by = None, ascending = False):

    franchise = data[data['belongs_to_collection'].notna()]

    franchise_stat = franchise.groupby('director').agg({
        'id': 'count',
        'revenue_millions': 'sum',
        'vote_average': 'mean'
    })

    franchise_stat.columns = ['total_movies_directed', 'total_revenue_millions',
                               'mean_rating']
    
    if sort_by:
        franchise_stat = franchise_stat.sort_values(by = sort_by, ascending = ascending)
    return franchise_stat


Failed to fetch data for ID: 0: 404, {'success': False, 'status_code': 6, 'status_message': 'Invalid id: The pre-requisite id is invalid or not found.'}
