In [None]:
# -----------------------------
# STEP 1: Fetch Movie Data from TMDB API
# -----------------------------

import os
import requests
import pandas as pd

# Base URL for TMDB movie API
base_url = "https://api.themoviedb.org/3/movie/"

# Read API key from environment variable
# Make sure you set it using:
#   os.environ["TMDB_API_KEY"] = "your_key_here"
API_KEY = os.getenv("TMDB_API_KEY")

# Check if the API key exists
if API_KEY is None:
    raise ValueError("API key not found! Please set TMDB_API_KEY as an environment variable.")

# Create the header needed for authentication
headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json;charset=utf-8"
}

# Given list of movie IDs
movie_ids = [0, 299534, 19995, 140607, 299536, 597, 135397, 420818, 24428, 
             168259, 99861, 284054, 12445, 181808, 330457, 351286, 109445, 
             321612, 260513]

movies_data = []  # This will hold the movie JSON data

# Loop through each movie ID and fetch its details
for m_id in movie_ids:
    
    # Build the full request URL
    url = base_url + str(m_id)
    
    response = requests.get(url, headers=headers)
    
    # Check if the movie ID is valid
    if response.status_code == 200:
        movie_json = response.json()
        movies_data.append(movie_json)
        print(f"Fetched movie ID {m_id}")
    else:
        print(f"Movie ID {m_id} not found or could not be fetched (Status: {response.status_code})")

# Convert list of JSON objects to a DataFrame
movie_df = pd.DataFrame(movies_data)

# Display the first rows
movie_df.head()


NameError: name 'pd' is not defined

In [None]:
# -------------------------------------------
# STEP 2: Data Cleaning & Preparation
# -------------------------------------------

# Make a working copy so we keep original data intact
clean_df = movie_df.copy()

# Columns to drop (not useful for analysis)
columns_to_drop = ['adult', 'imdb_id', 'original_title', 'video', 'homepage']

# Drop the columns (ignore_errors=True prevents errors if column missing)
clean_df = clean_df.drop(columns=columns_to_drop, errors='ignore')

clean_df.head()


### STEP 1: Fetch Movie Data from API

In [None]:
# Columns that contain lists of dictionaries
json_columns = [
    'belongs_to_collection', 
    'genres', 
    'production_countries', 
    'production_companies', 
    'spoken_languages'
]

# Helper function: extract "name" fields
def extract_names(value):
    """
    Converts list of dictionaries → a single string of names separated by '|'.
    Example: [{'name':'Action'}, {'name':'Drama'}] → "Action|Drama"
    """
    if isinstance(value, list):
        names = [item.get('name', '') for item in value if isinstance(item, dict)]
        return "|".join(names)
    return None

# Apply extraction to JSON-like columns
for col in json_columns:
    clean_df[col] = clean_df[col].apply(extract_names)

clean_df.head()


In [None]:
# Extract collection name safely
def extract_collection(value):
    """
    Extracts collection name from a dictionary.
    Example: {'id': 123, 'name': 'Avengers Collection'} → 'Avengers Collection'
    """
    if isinstance(value, dict):
        return value.get('name')
    return None

clean_df['belongs_to_collection'] = movie_df['belongs_to_collection'].apply(extract_collection)

clean_df.head()


In [None]:
# Convert numerical columns
numeric_cols = ['budget', 'revenue', 'popularity', 'vote_count', 'vote_average', 'runtime']

for col in numeric_cols:
    clean_df[col] = pd.to_numeric(clean_df[col], errors='coerce')  # invalid → NaN

# Convert release_date to datetime
clean_df['release_date'] = pd.to_datetime(clean_df['release_date'], errors='coerce')


Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,origin_country,original_language,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,/9wXPKruA6bWYk2co5ix6fH59Qr8.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",356000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...",https://www.marvel.com/movies/avengers-endgame,299534,tt4154796,[US],en,...,2019-04-24,2799439100,181,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Avenge the fallen.,Avengers: Endgame,False,8.238,26962
1,False,/7JNzw1tSZZEgsBw6lu0VfO2X2Ef.jpg,"{'id': 87096, 'name': 'Avatar Collection', 'po...",237000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://www.avatar.com/movies/avatar,19995,tt0499549,[US],en,...,2009-12-15,2923706026,162,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Enter the world of Pandora.,Avatar,False,7.594,32856
2,False,/8BTsTfln4jlQrLXUBquXJ0ASQy9.jpg,"{'id': 10, 'name': 'Star Wars Collection', 'po...",245000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",http://www.starwars.com/films/star-wars-episod...,140607,tt2488496,[US],en,...,2015-12-15,2068223624,136,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Every generation has a story.,Star Wars: The Force Awakens,False,7.3,20103
3,False,/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",300000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",https://www.marvel.com/movies/avengers-infinit...,299536,tt4154756,[US],en,...,2018-04-25,2052415039,149,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Destiny arrives all the same.,Avengers: Infinity War,False,8.236,31176
4,False,/xnHVX37XZEp33hhCbYlQFq7ux1J.jpg,,200000000,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",https://www.paramountmovies.com/movies/titanic,597,tt0120338,[US],en,...,1997-11-18,2264162353,194,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Nothing on earth could come between them.,Titanic,False,7.903,26511


### STEP 2: Data Cleaning and Preprocessing

In [None]:
# Replace 0 with NaN (zero budget/revenue is unrealistic)
clean_df['budget'].replace(0, pd.NA, inplace=True)
clean_df['revenue'].replace(0, pd.NA, inplace=True)
clean_df['runtime'].replace(0, pd.NA, inplace=True)

clean_df.head()


Index(['adult', 'backdrop_path', 'belongs_to_collection', 'budget', 'genres',
       'homepage', 'id', 'imdb_id', 'origin_country', 'original_language',
       'original_title', 'overview', 'popularity', 'poster_path',
       'production_companies', 'production_countries', 'release_date',
       'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title',
       'video', 'vote_average', 'vote_count'],
      dtype='object')
26


In [None]:
# Convert to million USD
clean_df['budget_musd'] = clean_df['budget'] / 1_000_000
clean_df['revenue_musd'] = clean_df['revenue'] / 1_000_000


21


In [None]:
clean_df = clean_df.dropna(thresh=10)

Unnamed: 0,belongs_to_collection,genres,production_countries,production_companies,spoken_languages
0,"{'id': 86311, 'name': 'The Avengers Collection...","[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","[{'iso_3166_1': 'US', 'name': 'United States o...","[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'english_name': 'English', 'iso_639_1': 'en'..."
1,"{'id': 87096, 'name': 'Avatar Collection', 'po...","[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","[{'iso_3166_1': 'US', 'name': 'United States o...","[{'id': 444, 'logo_path': None, 'name': 'Dune ...","[{'english_name': 'English', 'iso_639_1': 'en'..."
2,"{'id': 10, 'name': 'Star Wars Collection', 'po...","[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","[{'iso_3166_1': 'US', 'name': 'United States o...","[{'id': 1, 'logo_path': '/tlVSws0RvvtPBwViUyOF...","[{'english_name': 'English', 'iso_639_1': 'en'..."
3,"{'id': 86311, 'name': 'The Avengers Collection...","[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","[{'iso_3166_1': 'US', 'name': 'United States o...","[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'english_name': 'English', 'iso_639_1': 'en'..."
4,,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...","[{'iso_3166_1': 'US', 'name': 'United States o...","[{'id': 4, 'logo_path': '/jay6WcMgagAklUt7i9Eu...","[{'english_name': 'English', 'iso_639_1': 'en'..."
5,"{'id': 328, 'name': 'Jurassic Park Collection'...","[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","[{'iso_3166_1': 'US', 'name': 'United States o...","[{'id': 56, 'logo_path': '/cEaxANEisCqeEoRvODv...","[{'english_name': 'English', 'iso_639_1': 'en'..."
6,"{'id': 762512, 'name': 'The Lion King (Reboot)...","[{'id': 12, 'name': 'Adventure'}, {'id': 18, '...","[{'iso_3166_1': 'US', 'name': 'United States o...","[{'id': 2, 'logo_path': '/wdrCwmRnLFJhEoH8GSfy...","[{'english_name': 'English', 'iso_639_1': 'en'..."
7,"{'id': 86311, 'name': 'The Avengers Collection...","[{'id': 878, 'name': 'Science Fiction'}, {'id'...","[{'iso_3166_1': 'US', 'name': 'United States o...","[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'english_name': 'English', 'iso_639_1': 'en'..."
8,"{'id': 9485, 'name': 'The Fast and the Furious...","[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...","[{'iso_3166_1': 'US', 'name': 'United States o...","[{'id': 333, 'logo_path': '/5xUJfzPZ8jWJUDzYtI...","[{'english_name': 'Arabic', 'iso_639_1': 'ar',..."
9,"{'id': 86311, 'name': 'The Avengers Collection...","[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","[{'iso_3166_1': 'US', 'name': 'United States o...","[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'english_name': 'English', 'iso_639_1': 'en'..."


In [None]:
final_columns = [
    'id', 'title', 'tagline', 'release_date', 'genres', 'belongs_to_collection',
    'original_language', 'budget_musd', 'revenue_musd', 'production_companies',
    'production_countries', 'vote_count', 'vote_average', 'popularity', 'runtime',
    'overview', 'spoken_languages', 'poster_path', 'cast', 'cast_size',
    'director', 'crew_size'
]

# Select only the columns that exist
clean_df = clean_df[[col for col in final_columns if col in clean_df.columns]]

clean_df = clean_df.reset_index(drop=True)

clean_df.head()


In [None]:
# ---------------------------------------
# STEP 3: KPI CALCULATIONS
# ---------------------------------------

# Profit = Revenue - Budget
clean_df['profit'] = clean_df['revenue_musd'] - clean_df['budget_musd']

# ROI = Revenue / Budget
clean_df['roi'] = clean_df['revenue_musd'] / clean_df['budget_musd']

# Avoid infinite values if budget is missing
clean_df['roi'].replace([float('inf'), -float('inf')], pd.NA, inplace=True)

clean_df[['title', 'budget_musd', 'revenue_musd', 'profit', 'roi']].head()


In [None]:
# A simple function to help rank movies based on any numeric column
def rank_movies(df, column, ascending=False, top_n=10):
    """
    Sorts movies based on a selected column.
    ascending=False → highest values first.
    """
    ranked = df.sort_values(by=column, ascending=ascending)
    return ranked[['title', column]].head(top_n)


In [None]:
# Highest Revenue
highest_revenue = rank_movies(clean_df, 'revenue_musd')
print("Highest Revenue Movies:")
print(highest_revenue)

# Highest Budget
highest_budget = rank_movies(clean_df, 'budget_musd')
print("\nHighest Budget Movies:")
print(highest_budget)

# Highest Profit
highest_profit = rank_movies(clean_df, 'profit')
print("\nHighest Profit Movies:")
print(highest_profit)

# Lowest Profit
lowest_profit = rank_movies(clean_df, 'profit', ascending=True)
print("\nLowest Profit Movies:")
print(lowest_profit)


In [None]:
# Filter movies with a meaningful budget
filtered_df = clean_df[clean_df['budget_musd'] >= 10]

# Highest ROI
highest_roi = rank_movies(filtered_df, 'roi')
print("\nHighest ROI Movies (Budget ≥ 10M):")
print(highest_roi)

# Lowest ROI
lowest_roi = rank_movies(filtered_df, 'roi', ascending=True)
print("\nLowest ROI Movies (Budget ≥ 10M):")
print(lowest_roi)


Unnamed: 0,collection_name,genre_names,production_countries_clean,production_companies_clean,spoken_languages_clean
0,The Avengers Collection,Adventure|Science Fiction|Action,United States of America,Marvel Studios,English|Japanese|Xhosa
1,Avatar Collection,Action|Adventure|Fantasy|Science Fiction,United States of America|United Kingdom,Dune Entertainment|Lightstorm Entertainment|20...,English|Spanish
2,Star Wars Collection,Adventure|Action|Science Fiction,United States of America,Lucasfilm Ltd.|Bad Robot,English
3,The Avengers Collection,Adventure|Action|Science Fiction,United States of America,Marvel Studios,English|Xhosa
4,,Drama|Romance,United States of America,Paramount Pictures|20th Century Fox|Lightstorm...,English|French|German|Swedish|Italian|Russian
5,Jurassic Park Collection,Action|Adventure|Science Fiction|Thriller,United States of America,Amblin Entertainment|Universal Pictures|Legend...,English
6,The Lion King (Reboot) Collection,Adventure|Drama|Family|Animation,United States of America,Walt Disney Pictures|Fairview Entertainment,English
7,The Avengers Collection,Science Fiction|Action|Adventure,United States of America,Marvel Studios,English|Hindi|Russian
8,The Fast and the Furious Collection,Action|Crime|Thriller,United States of America,Original Film|One Race|Universal Pictures,Arabic|English|Spanish|Thai
9,The Avengers Collection,Action|Adventure|Science Fiction,United States of America,Marvel Studios,English


In [None]:
# Most voted movies
most_voted = rank_movies(clean_df, 'vote_count')
print("Most Voted Movies:")
print(most_voted)

# Only movies with at least 10 votes
rated_df = clean_df[clean_df['vote_count'] >= 10]

# Highest Rated Movies
highest_rated = rank_movies(rated_df, 'vote_average')
print("\nHighest Rated Movies (≥10 votes):")
print(highest_rated)

# Lowest Rated Movies
lowest_rated = rank_movies(rated_df, 'vote_average', ascending=True)
print("\nLowest Rated Movies (≥10 votes):")
print(lowest_rated)


KeyError: "['belongs_to_collection', 'genres', 'production_countries', 'production_companies', 'spoken_languages'] not found in axis"

In [None]:
search1 = clean_df[
    clean_df['genres'].str.contains("Science Fiction", na=False) &
    clean_df['genres'].str.contains("Action", na=False) &
    clean_df['cast'].str.contains("Bruce Willis", na=False)
]

search1 = search1.sort_values(by='vote_average', ascending=False)

search1[['title', 'genres', 'vote_average']]


collection_name
The Avengers Collection                4
Star Wars Collection                   2
None                                   2
Jurassic Park Collection               2
Frozen Collection                      2
Avatar Collection                      1
The Lion King (Reboot) Collection      1
The Fast and the Furious Collection    1
Black Panther Collection               1
Harry Potter Collection                1
The Incredibles Collection             1
Name: count, dtype: int64

In [None]:
search2 = clean_df[
    clean_df['cast'].str.contains("Uma Thurman", na=False) &
    (clean_df['director'] == "Quentin Tarantino")
]

search2 = search2.sort_values(by='runtime')

search2[['title', 'runtime', 'director']]


genre_names
Adventure|Action|Science Fiction             3
Action|Adventure|Science Fiction|Thriller    2
Action|Adventure|Science Fiction             2
None                                         1
Adventure|Science Fiction|Action             1
Action|Adventure|Fantasy|Science Fiction     1
Drama|Romance                                1
Adventure|Drama|Family|Animation             1
Science Fiction|Action|Adventure             1
Action|Crime|Thriller                        1
Adventure|Fantasy                            1
Family|Animation|Adventure|Comedy|Fantasy    1
Animation|Family|Adventure|Fantasy           1
Family|Fantasy|Romance                       1
Action|Adventure|Animation|Family            1
Name: count, dtype: int64

In [None]:
# Franchise movies = those with a collection name
franchise_df = clean_df[clean_df['belongs_to_collection'].notna()]

# Standalone movies = no collection name
standalone_df = clean_df[clean_df['belongs_to_collection'].isna()]

# Compare statistics
franchise_stats = franchise_df[['revenue_musd','roi','budget_musd','popularity','vote_average']].mean()
standalone_stats = standalone_df[['revenue_musd','roi','budget_musd','popularity','vote_average']].mean()

print("Franchise Stats:\n", franchise_stats)
print("\nStandalone Stats:\n", standalone_stats)


production_countries_clean
United States of America                   16
United States of America|United Kingdom     1
United Kingdom|United States of America     1
Name: count, dtype: int64

In [None]:
franchise_group = franchise_df.groupby('belongs_to_collection').agg({
    'id': 'count',
    'budget_musd': ['sum','mean'],
    'revenue_musd': ['sum','mean'],
    'vote_average': 'mean'
})

franchise_group = franchise_group.sort_values(('revenue_musd','sum'), ascending=False)

franchise_group.head()


production_companies_clean
Marvel Studios                                                                          5
Walt Disney Animation Studios                                                           2
Dune Entertainment|Lightstorm Entertainment|20th Century Fox|Ingenious Film Partners    1
Lucasfilm Ltd.|Bad Robot                                                                1
Paramount Pictures|20th Century Fox|Lightstorm Entertainment                            1
Amblin Entertainment|Universal Pictures|Legendary Pictures                              1
Walt Disney Pictures|Fairview Entertainment                                             1
Original Film|One Race|Universal Pictures                                               1
Warner Bros. Pictures|Heyday Films                                                      1
Lucasfilm Ltd.                                                                          1
Amblin Entertainment|Universal Pictures                                  

In [None]:
director_group = clean_df.groupby('director').agg({
    'id': 'count',
    'revenue_musd': 'sum',
    'vote_average': 'mean'
}).sort_values('revenue_musd', ascending=False)

director_group.head()


spoken_languages_clean
English                                          9
English|Japanese|Xhosa                           1
English|Spanish                                  1
English|Xhosa                                    1
English|French|German|Swedish|Italian|Russian    1
English|Hindi|Russian                            1
Arabic|English|Spanish|Thai                      1
English|Korean|Swahili|Xhosa                     1
English|Russian                                  1
English|French                                   1
Name: count, dtype: int64

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8,5))
plt.scatter(clean_df['budget_musd'], clean_df['revenue_musd'])
plt.xlabel("Budget (Million USD)")
plt.ylabel("Revenue (Million USD)")
plt.title("Revenue vs Budget")
plt.show()

backdrop_path                 0
belongs_to_collection         2
budget                        0
genres                        0
id                            0
origin_country                0
original_language             0
overview                      0
popularity                    0
poster_path                   0
production_companies          0
production_countries          0
release_date                  0
revenue                       0
runtime                       0
spoken_languages              0
status                        0
tagline                       0
title                         0
vote_average                  0
vote_count                    0
collection_name               2
genre_names                   0
production_countries_clean    0
production_companies_clean    0
spoken_languages_clean        0
dtype: int64


In [None]:
# Expand genres into separate rows
genre_df = clean_df[['genres', 'roi']].dropna()
genre_df = genre_df.assign(genres = genre_df['genres'].str.split('|')).explode('genres')

plt.figure(figsize=(8,5))
plt.boxplot([genre_df[genre_df['genres']==g]['roi'].dropna() 
             for g in genre_df['genres'].unique()],
            labels=genre_df['genres'].unique(),
            vert=True)

plt.xticks(rotation=90)
plt.title("ROI Distribution by Genre")
plt.ylabel("ROI")
plt.show()


Unnamed: 0,backdrop_path,belongs_to_collection,budget,genres,id,origin_country,original_language,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,collection_name,genre_names,production_countries_clean,production_companies_clean,spoken_languages_clean
4,/xnHVX37XZEp33hhCbYlQFq7ux1J.jpg,,200000000,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",597,[US],en,101-year-old Rose DeWitt Bukater tells the sto...,28.5469,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,"[{'id': 4, 'logo_path': '/jay6WcMgagAklUt7i9Eu...","[{'iso_3166_1': 'US', 'name': 'United States o...",1997-11-18,2264162353,194,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Nothing on earth could come between them.,Titanic,7.903,26511,,Drama|Romance,United States of America,Paramount Pictures|20th Century Fox|Lightstorm...,English|French|German|Swedish|Italian|Russian
16,/uU1Mt4JWhDvl4vKb3AfxNsorkoM.jpg,,160000000,"[{'id': 10751, 'name': 'Family'}, {'id': 14, '...",321612,[US],en,A live-action adaptation of Disney's version o...,11.6822,/hKegSKIDep2ewJWPUQD7u0KqFIp.jpg,"[{'id': 2, 'logo_path': '/wdrCwmRnLFJhEoH8GSfy...","[{'iso_3166_1': 'US', 'name': 'United States o...",2017-03-16,1266115964,129,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Be our guest.,Beauty and the Beast,6.969,15832,,Family|Fantasy|Romance,United States of America,Walt Disney Pictures|Mandeville Films,English|French


In [None]:
plt.figure(figsize=(8,5))
plt.scatter(clean_df['popularity'], clean_df['vote_average'])
plt.xlabel("Popularity")
plt.ylabel("Rating (vote_average)")
plt.title("Popularity vs Rating")
plt.show()


        id  popularity     budget release_date
0   299534     13.9608  356000000   2019-04-24
1    19995     26.7105  237000000   2009-12-15
2   140607      8.2672  245000000   2015-12-15
3   299536     22.0512  300000000   2018-04-25
4      597     28.5469  200000000   1997-11-18
5   135397      9.3334  150000000   2015-06-06
6   420818      8.7748  260000000   2019-07-12
7    24428     45.9289  220000000   2012-04-25
8   168259      8.1661  190000000   2015-04-01
9    99861     10.9130  235000000   2015-04-22
10  284054      7.4587  200000000   2018-02-13
11   12445     16.7884  125000000   2011-07-12
12  181808      6.9173  300000000   2017-12-13
13  330457      8.9592  150000000   2019-11-20
14  351286      8.9474  170000000   2018-06-06
15  109445     19.5948  150000000   2013-11-20
16  321612     11.6822  160000000   2017-03-16
17  260513      9.8497  200000000   2018-06-14


In [None]:
clean_df['release_year'] = clean_df['release_date'].dt.year

yearly = clean_df.groupby('release_year')['revenue_musd'].mean()

plt.figure(figsize=(8,5))
plt.plot(yearly.index, yearly.values)
plt.xlabel("Year")
plt.ylabel("Average Revenue (M USD)")
plt.title("Yearly Box Office Performance")
plt.grid(True)
plt.show()


In [None]:
labels = ["Franchise", "Standalone"]
avg_revenue = [
    franchise_df['revenue_musd'].mean(),
    standalone_df['revenue_musd'].mean()
]

plt.figure(figsize=(6,4))
plt.bar(labels, avg_revenue)
plt.title("Franchise vs Standalone: Average Revenue")
plt.ylabel("Average Revenue (M USD)")
plt.show()
