In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import plotly.express as px
import plotly.graph_objects as go

In [2]:
combined = Path("Resources/combined_cleaned_data.csv")

combined_df =pd.read_csv(combined)

In [3]:
combined_df.head()

Unnamed: 0,show_id,type,title,country,date_added,release_year,rating,duration,listed_in,streaming_service
0,s1,Movie,Dick Johnson Is Dead,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,Netflix
1,s2,TV Show,Blood & Water,South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries",Netflix
2,s3,TV Show,Ganglands,,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",Netflix
3,s4,TV Show,Jailbirds New Orleans,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV",Netflix
4,s5,TV Show,Kota Factory,India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",Netflix


In [4]:
Netflix_all_df = combined_df[(combined_df["streaming_service"] == "Netflix")]
Netflix_all_rating = Netflix_all_df["rating"].value_counts()
Netflix_all_rating

rating
TV-MA       3207
TV-14       2160
TV-PG        863
R            799
PG-13        490
TV-Y7        334
TV-Y         307
PG           287
TV-G         220
NR            80
G             41
TV-Y7-FV       6
NC-17          3
UR             3
74 min         1
84 min         1
66 min         1
Name: count, dtype: int64

In [5]:
Hulu_all_df = combined_df[(combined_df["streaming_service"] == "Hulu")]
Hulu_all_rating = Hulu_all_df["rating"].value_counts()
Hulu_all_rating

rating
TV-14      691
TV-MA      391
R          345
TV-PG      321
PG-13      163
          ... 
157 min      1
28 min       1
64 min       1
5 min        1
6 min        1
Name: count, Length: 88, dtype: int64

In [6]:
Disney_all_df = combined_df[(combined_df["streaming_service"] == "Disney")]
Disney_all_rating = Disney_all_df["rating"].value_counts()
Disney_all_rating

rating
TV-G        318
TV-PG       301
G           253
PG          236
TV-Y7       131
TV-14        79
PG-13        66
TV-Y         50
TV-Y7-FV     13
Name: count, dtype: int64

In [7]:
Amazon_all_df = combined_df[(combined_df["streaming_service"] == "Amazon")]
Amazon_all_rating = Amazon_all_df["rating"].value_counts()
Amazon_all_rating

rating
13+         2117
16+         1547
ALL         1268
18+         1243
R           1010
PG-13        393
7+           385
PG           253
NR           223
TV-14        208
TV-PG        169
TV-NR        105
G             93
TV-G          81
TV-MA         77
TV-Y          74
TV-Y7         39
UNRATED       33
NC-17          3
AGES_18_       3
NOT_RATE       3
AGES_16_       2
16             1
ALL_AGES       1
Name: count, dtype: int64

In [8]:
All_rating_df = pd.DataFrame({
    "Netflix Ratings": Netflix_all_rating,
    "Hulu Ratings": Hulu_all_rating,
    "Amazon Ratings": Amazon_all_rating,
    "Disney Ratings": Disney_all_rating
})

All_rating_df

Unnamed: 0_level_0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1 Season,,42.0,,
100 min,,3.0,,
101 min,,7.0,,
102 min,,3.0,,
103 min,,3.0,,
...,...,...,...,...
TV-Y,307.0,31.0,74.0,50.0
TV-Y7,334.0,46.0,39.0,131.0
TV-Y7-FV,6.0,,,13.0
UNRATED,,,33.0,


In [9]:
All_rating_df['Rating Category'] = All_rating_df.index.to_series().apply( lambda x: 'Teens' if x in ['13+', 'PG-13', 'TV-14', '16+'] 
                                                                       else ('Kids' if x in ['PG', 'TV-PG', '7+', 'TV-Y7', 'TV-Y7-FV'] 
                                                                             else ('Adults' if x in ['R', 'NC-17', '18+', 'TV-MA', 'UR', 'UNRATED']
                                                                                    else ('All Ages' if x in ['G', 'TV-G', 'TV-Y', 'ALL', 'ALL AGES']
                                                                             else 'X') )))

substring = 'X'
filter = All_rating_df['Rating Category'].str.contains(substring)
All_rating_filtered_df = All_rating_df[~filter]

All_rating_filtered_df

Unnamed: 0_level_0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings,Rating Category
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
13+,,,2117.0,,Teens
16+,,,1547.0,,Teens
18+,,,1243.0,,Adults
7+,,,385.0,,Kids
ALL,,,1268.0,,All Ages
G,41.0,18.0,93.0,253.0,All Ages
NC-17,3.0,,3.0,,Adults
PG,287.0,105.0,253.0,236.0,Kids
PG-13,490.0,163.0,393.0,66.0,Teens
R,799.0,345.0,1010.0,,Adults


In [10]:
Adults_Rating_All = All_rating_filtered_df[(All_rating_filtered_df["Rating Category"] == "Adults")]
Adults_Rating_All

Unnamed: 0_level_0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings,Rating Category
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
18+,,,1243.0,,Adults
NC-17,3.0,,3.0,,Adults
R,799.0,345.0,1010.0,,Adults
TV-MA,3207.0,391.0,77.0,,Adults
UNRATED,,,33.0,,Adults
UR,3.0,,,,Adults


In [11]:
Adults_Rating_Netflix = Adults_Rating_All["Netflix Ratings"].sum()
Adults_Rating_Netflix

4012.0

In [12]:
Adults_Rating_Hulu = Adults_Rating_All["Hulu Ratings"].sum()
Adults_Rating_Hulu

736.0

In [13]:
Adults_Rating_Amazon = Adults_Rating_All["Amazon Ratings"].sum()
Adults_Rating_Amazon

2366.0

In [14]:
Adults_Rating_Disney = Adults_Rating_All["Disney Ratings"].sum()
Adults_Rating_Disney

0.0

In [15]:
Teens_Rating_All = All_rating_filtered_df[(All_rating_filtered_df["Rating Category"] == "Teens")]
Teens_Rating_All

Unnamed: 0_level_0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings,Rating Category
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
13+,,,2117.0,,Teens
16+,,,1547.0,,Teens
PG-13,490.0,163.0,393.0,66.0,Teens
TV-14,2160.0,691.0,208.0,79.0,Teens


In [16]:
Teens_Rating_Netflix = Teens_Rating_All["Netflix Ratings"].sum()
Teens_Rating_Netflix

2650.0

In [17]:
Teens_Rating_Hulu = Teens_Rating_All["Hulu Ratings"].sum()
Teens_Rating_Hulu

854.0

In [18]:
Teens_Rating_Amazon = Teens_Rating_All["Amazon Ratings"].sum()
Teens_Rating_Amazon

4265.0

In [19]:
Teens_Rating_Disney = Teens_Rating_All["Disney Ratings"].sum()
Teens_Rating_Disney

145.0

In [20]:
Kids_Rating_All = All_rating_filtered_df[(All_rating_filtered_df["Rating Category"] == "Kids")]
Kids_Rating_All

Unnamed: 0_level_0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings,Rating Category
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
7+,,,385.0,,Kids
PG,287.0,105.0,253.0,236.0,Kids
TV-PG,863.0,321.0,169.0,301.0,Kids
TV-Y7,334.0,46.0,39.0,131.0,Kids
TV-Y7-FV,6.0,,,13.0,Kids


In [21]:
Kids_Rating_Netflix = Kids_Rating_All["Netflix Ratings"].sum()
Kids_Rating_Netflix

1490.0

In [22]:
Kids_Rating_Hulu = Kids_Rating_All["Hulu Ratings"].sum()
Kids_Rating_Hulu

472.0

In [23]:
Kids_Rating_Amazon = Kids_Rating_All["Amazon Ratings"].sum()
Kids_Rating_Amazon

846.0

In [24]:
Kids_Rating_Disney = Kids_Rating_All["Disney Ratings"].sum()
Kids_Rating_Disney

681.0

In [25]:
AllAges_Rating_All = All_rating_filtered_df[(All_rating_filtered_df["Rating Category"] == "All Ages")]
AllAges_Rating_All

Unnamed: 0_level_0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings,Rating Category
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ALL,,,1268.0,,All Ages
G,41.0,18.0,93.0,253.0,All Ages
TV-G,220.0,148.0,81.0,318.0,All Ages
TV-Y,307.0,31.0,74.0,50.0,All Ages


In [26]:
AllAges_Rating_Netflix = AllAges_Rating_All["Netflix Ratings"].sum()
AllAges_Rating_Netflix

568.0

In [27]:
AllAges_Rating_Hulu = AllAges_Rating_All["Hulu Ratings"].sum()
AllAges_Rating_Hulu

197.0

In [28]:
AllAges_Rating_Amazon = AllAges_Rating_All["Amazon Ratings"].sum()
AllAges_Rating_Amazon

1516.0

In [29]:
AllAges_Rating_Disney = AllAges_Rating_All["Disney Ratings"].sum()
AllAges_Rating_Disney

621.0

In [30]:
AllAges_Rating_All_Df = pd.DataFrame (
    {"Netflix Ratings": [AllAges_Rating_Netflix],
     "Hulu Ratings": [AllAges_Rating_Hulu],
     "Amazon Ratings": [AllAges_Rating_Amazon],
     "Disney Ratings": [AllAges_Rating_Disney]},
index=["All Ages"])

AllAges_Rating_All_Df

Unnamed: 0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings
All Ages,568.0,197.0,1516.0,621.0


In [31]:
Kids_Rating_All_Df = pd.DataFrame (
    {"Netflix Ratings": [Kids_Rating_Netflix],
     "Hulu Ratings": [Kids_Rating_Hulu],
     "Amazon Ratings": [Kids_Rating_Amazon],
     "Disney Ratings": [Kids_Rating_Disney]},
index=["Kids"])

Kids_Rating_All_Df

Unnamed: 0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings
Kids,1490.0,472.0,846.0,681.0


In [32]:
Teens_Rating_All_Df = pd.DataFrame (
    {"Netflix Ratings": [Teens_Rating_Netflix],
     "Hulu Ratings": [Teens_Rating_Hulu],
     "Amazon Ratings": [Teens_Rating_Amazon],
     "Disney Ratings": [Teens_Rating_Disney]},
index=["Teens"])

Teens_Rating_All_Df

Unnamed: 0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings
Teens,2650.0,854.0,4265.0,145.0


In [33]:
Adults_Rating_All_Df = pd.DataFrame (
    {"Netflix Ratings": [Adults_Rating_Netflix],
     "Hulu Ratings": [Adults_Rating_Hulu],
     "Amazon Ratings": [Adults_Rating_Amazon],
     "Disney Ratings": [Adults_Rating_Disney]},
index=["Adults"])

Adults_Rating_All_Df

Unnamed: 0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings
Adults,4012.0,736.0,2366.0,0.0


In [34]:
All_Ratings_Complete_Df = pd.DataFrame (
    {"Netflix": [AllAges_Rating_Netflix, Kids_Rating_Netflix, Teens_Rating_Netflix, Adults_Rating_Netflix],
     "Hulu": [AllAges_Rating_Hulu, Kids_Rating_Hulu, Teens_Rating_Hulu, Adults_Rating_Hulu],
     "Amazon": [AllAges_Rating_Amazon, Kids_Rating_Amazon, Teens_Rating_Amazon, Adults_Rating_Amazon],
     "Disney": [AllAges_Rating_Disney, Kids_Rating_Disney, Teens_Rating_Disney, Adults_Rating_Disney]},
index=["All Ages", "Kids", "Teens", "Adults"])

All_Ratings_Complete_Df

Unnamed: 0,Netflix,Hulu,Amazon,Disney
All Ages,568.0,197.0,1516.0,621.0
Kids,1490.0,472.0,846.0,681.0
Teens,2650.0,854.0,4265.0,145.0
Adults,4012.0,736.0,2366.0,0.0


In [70]:
All_Ratings_Complete_Df.to_csv('Ratings_Summary.csv', index=True)

In [62]:
Netflix_All_Ratings_Complete_Df = All_Ratings_Complete_Df["Netflix"]
Netflix_All_Ratings_Complete_Df

All Ages     568.0
Kids        1490.0
Teens       2650.0
Adults      4012.0
Name: Netflix, dtype: float64

In [64]:
Hulu_All_Ratings_Complete_Df = All_Ratings_Complete_Df["Hulu"]
Hulu_All_Ratings_Complete_Df

All Ages    197.0
Kids        472.0
Teens       854.0
Adults      736.0
Name: Hulu, dtype: float64

In [65]:
Amazon_All_Ratings_Complete_Df = All_Ratings_Complete_Df["Amazon"]
Amazon_All_Ratings_Complete_Df

All Ages    1516.0
Kids         846.0
Teens       4265.0
Adults      2366.0
Name: Amazon, dtype: float64

In [66]:
Disney_All_Ratings_Complete_Df = All_Ratings_Complete_Df["Disney"]
Disney_All_Ratings_Complete_Df

All Ages    621.0
Kids        681.0
Teens       145.0
Adults        0.0
Name: Disney, dtype: float64

In [69]:
import plotly.graph_objects as go


#  top 10 disney
fig = go.Figure()

fig.add_trace(go.Bar(
    x=Netflix_All_Ratings_Complete_Df.index,
    y=Netflix_All_Ratings_Complete_Df.values,
    name='Netflix'
))

# top 10 hulu
fig.add_trace(go.Bar(
    x=Hulu_All_Ratings_Complete_Df.index,
    y=Hulu_All_Ratings_Complete_Df.values,
    name='Hulu',
    visible=False  
))
# top 10 amazon
fig.add_trace(go.Bar(
    x=Amazon_All_Ratings_Complete_Df.index,
    y=Amazon_All_Ratings_Complete_Df.values,
    name='Amazon',
    visible=False  
))

#top 10 netflix 
fig.add_trace(go.Bar(
    x=Disney_All_Ratings_Complete_Df.index,
    y=Disney_All_Ratings_Complete_Df.values,
    name='Disney',
    visible=False 
))

# dropdown menu
fig.update_layout(
    updatemenus=[
        {
            "buttons": [
                {
                    "label": "Netflix",
                    "method": "update",
                    "args": [{"visible": [True, False, False, False, False]}, {"title": "Netflix Content Ratings"}],
                },
                {
                    "label": "Hulu",
                    "method": "update",
                    "args": [{"visible": [False, True, False, False, False]}, {"title": "Hulu Content Ratings"}],
                },
                {
                    "label": "Amazon",
                    "method": "update",
                    "args": [{"visible": [False, False, True, False, False]}, {"title": "Amazon Content Ratings"}],
                },
                {
                    "label": "Disney",
                    "method": "update",
                    "args": [{"visible": [False, False, False, True, False]}, {"title": "Disney Content Ratings"}],
                }
            ],
            "direction": "down",
            "showactive": True,
        }
    ]
)




# Set titles
fig.update_layout(
    title="Content Ratings",
    xaxis_title="Rating Category",
    yaxis_title="Number of Titles",
)

# Show the plot
fig.show()