In [1]:
#*******************************************************************************
# Dependencies and Setup
#*******************************************************************************
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

#*******************************************************************************
# Load Movie Metadata and Ratings Data Files
#*******************************************************************************
movies_metadata_to_load = "Movie_Datasets/movies_metadata.csv"
ratings_to_load = "Movie_Datasets/ratings.csv"

In [None]:
#*******************************************************************************
# Limit the dtaframe to the columns needed for the analysis
#*******************************************************************************
financial_df = financial_df[["rating", "budget", "revenue", "Profit"]]

In [None]:
#*******************************************************************************
# Determine the minimum and maximum profit values to create bins
#*******************************************************************************
minProfit = financial_df["budget"].min()
maxProfit = financial_df["budget"].max()
print(f"min budget is {minProfit}")
print(f"min budget is {maxProfit}")

In [None]:
#*******************************************************************************
# Create the bins by several Profit ranges
#*******************************************************************************
fin_bins = [-100, 0, 350, 700, 1646]

#*******************************************************************************
# Create the names for the four groups of Profit ranges
#*******************************************************************************
fin_group_names = ["< $0", "\$0-\$350", "\$350-\$700", "> $700" ]

In [None]:
#*******************************************************************************
# Create a new column to store the Profit range for each movie.
#*******************************************************************************
financial_df["Budget Range (millions)"] = pd.cut(financial_df["Budget"], fin_bins, 
                                                 labels=fin_group_names)

In [None]:
#*******************************************************************************
# Create a new dataframe to hold the 3 columns needed for this graph, group 
# the data by the rProfit ranges (Profit Range (millions)) and get the count 
# for each set ranges.
#*******************************************************************************
financial_count_df = financial_df[["rating", "Budget", "Budget Range (millions)"]]
financial_count_df = financial_count_df.groupby("Budget Range (millions)")
financial_count_df.count()["Budget"]

In [None]:
#*******************************************************************************
# Create the pie chart showing the breakdown of movies by Profit range.
#*******************************************************************************

#*******************************************************************************
# Define appearance for the pie chart
#*******************************************************************************
colors = ["lightgreen","lightblue", "gold", "greenyellow"]
explode = (0.15,0.15,0.15,0.15)

#*******************************************************************************
# Define the title for the pie chart
#*******************************************************************************
plt.title("% Movies by Budget Range (in millions)", y=1.25, fontsize=15)

#*******************************************************************************
# Define pie chart
#*******************************************************************************
plt.pie(financial_count_df.count()["Budget"], explode=explode, 
        labels=fin_group_names, colors=colors, autopct="%1.1f%%", shadow=True, 
        startangle=10, radius=1.6)

#*******************************************************************************
# Show Figures
#*******************************************************************************
plt.show()

In [None]:
# #*******************************************************************************
# Create a new dataframe to hold the 3 columns needed for this graph, group 
# the data by the Profit range (Profit Range (millions)) and get the average 
# for each set of hours.
#*******************************************************************************
financial_ave_df = financial_df[["rating", "Budget", "Budget Range (millions)"]]
financial_ave_df = financial_ave_df.groupby("BudgetRange (millions)").mean()

In [None]:
#*******************************************************************************
# Filter the DataFrame down only to the columns needed for the chart (index is 
# the Profit Range (millions))
#*******************************************************************************
financial_ave_df = financial_ave_df[["rating"]]

In [None]:
#*******************************************************************************
# Create a bar chart of the data
#*******************************************************************************
financial_ave_df.plot(kind="bar", figsize=(8,4))

#*******************************************************************************
# Set a title, axis labels and clear the legend
#*******************************************************************************
plt.title("Average Rating by Budget", y=1.05, fontsize=25)
plt.xlabel("Budget (millions)", y=1.05, fontsize=15)
plt.ylabel("Average Rating", fontsize=15)
plt.legend("")

#*******************************************************************************
# Set the upper and lower y values to make it easier to see the difference.
#*******************************************************************************
plt.ylim([3,3.3])

#*******************************************************************************
# Add the grid to the chart and display the chart
#*******************************************************************************
plt.grid()
plt.show()