In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import plotly.express as px
import plotly.graph_objects as go

# Creating the combined data file
---

In [2]:
netflix = Path("Resources/netflix_data.csv")
hulu = Path("Resources/hulu_data.csv")
amazon = Path("Resources/amazon_data.csv")
disney=Path("Resources/disney_data.csv")

netflix_df =pd.read_csv(netflix)
hulu_df =pd.read_csv(hulu)
amazon_df =pd.read_csv(amazon)
disney_df =pd.read_csv(disney)

In [3]:
netflix_df['streaming_service'] = 'Netflix'
hulu_df['streaming_service'] = 'Hulu'
amazon_df['streaming_service'] = 'Amazon'
disney_df['streaming_service'] = 'Disney'

In [4]:
combined_df = pd.concat([netflix_df, hulu_df, amazon_df, disney_df], axis=0)

In [5]:
new_combined =combined_df.drop(['director','cast','description'], axis=1)

In [6]:
new_combined.to_csv('resources/combined_cleaned_data.csv', index=False)

In [7]:
combined_df.to_csv('resources/combined_streaming_data.csv', index=False)

In [8]:
Netflix_all_df = new_combined[(new_combined["streaming_service"] == "Netflix")]
Netflix_all_rating = Netflix_all_df["rating"].value_counts()
Netflix_all_rating

rating
TV-MA       3207
TV-14       2160
TV-PG        863
R            799
PG-13        490
TV-Y7        334
TV-Y         307
PG           287
TV-G         220
NR            80
G             41
TV-Y7-FV       6
NC-17          3
UR             3
74 min         1
84 min         1
66 min         1
Name: count, dtype: int64

In [9]:
Hulu_all_df = new_combined[(new_combined["streaming_service"] == "Hulu")]
Hulu_all_rating = Hulu_all_df["rating"].value_counts()
Hulu_all_rating

rating
TV-14     691
TV-MA     391
R         345
TV-PG     321
PG-13     163
         ... 
34 min      1
47 min      1
65 min      1
37 min      1
71 min      1
Name: count, Length: 88, dtype: int64

In [10]:
Disney_all_df = new_combined[(new_combined["streaming_service"] == "Disney")]
Disney_all_rating = Disney_all_df["rating"].value_counts()
Disney_all_rating

rating
TV-G        318
TV-PG       301
G           253
PG          236
TV-Y7       131
TV-14        79
PG-13        66
TV-Y         50
TV-Y7-FV     13
Name: count, dtype: int64

In [11]:
Amazon_all_df = new_combined[(new_combined["streaming_service"] == "Amazon")]
Amazon_all_rating = Amazon_all_df["rating"].value_counts()
Amazon_all_rating

rating
13+         2117
16+         1547
ALL         1268
18+         1243
R           1010
PG-13        393
7+           385
PG           253
NR           223
TV-14        208
TV-PG        169
TV-NR        105
G             93
TV-G          81
TV-MA         77
TV-Y          74
TV-Y7         39
UNRATED       33
NC-17          3
AGES_18_       3
NOT_RATE       3
AGES_16_       2
16             1
ALL_AGES       1
Name: count, dtype: int64

In [12]:
All_rating_df = pd.DataFrame({
    "Netflix Ratings": Netflix_all_rating,
    "Hulu Ratings": Hulu_all_rating,
    "Amazon Ratings": Amazon_all_rating,
    "Disney Ratings": Disney_all_rating
})

All_rating_df

Unnamed: 0_level_0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1 Season,,42.0,,
100 min,,3.0,,
101 min,,7.0,,
102 min,,3.0,,
103 min,,3.0,,
...,...,...,...,...
TV-Y,307.0,31.0,74.0,50.0
TV-Y7,334.0,46.0,39.0,131.0
TV-Y7-FV,6.0,,,13.0
UNRATED,,,33.0,


In [13]:
All_rating_df['Rating Category'] = All_rating_df.index.to_series().apply( lambda x: 'Teens' if x in ['13+', 'PG-13', 'TV-14', '16+'] 
                                                                       else ('Kids' if x in ['PG', 'TV-PG', '7+', 'TV-Y7', 'TV-Y7-FV'] 
                                                                             else ('Adults' if x in ['R', 'NC-17', '18+', 'TV-MA', 'UR', 'UNRATED']
                                                                                    else ('All Ages' if x in ['G', 'TV-G', 'TV-Y', 'ALL', 'ALL AGES']
                                                                             else 'X') )))

substring = 'X'
filter = All_rating_df['Rating Category'].str.contains(substring)
All_rating_filtered_df = All_rating_df[~filter]

All_rating_filtered_df

Unnamed: 0_level_0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings,Rating Category
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
13+,,,2117.0,,Teens
16+,,,1547.0,,Teens
18+,,,1243.0,,Adults
7+,,,385.0,,Kids
ALL,,,1268.0,,All Ages
G,41.0,18.0,93.0,253.0,All Ages
NC-17,3.0,,3.0,,Adults
PG,287.0,105.0,253.0,236.0,Kids
PG-13,490.0,163.0,393.0,66.0,Teens
R,799.0,345.0,1010.0,,Adults


In [14]:
Adults_Rating_All = All_rating_filtered_df[(All_rating_filtered_df["Rating Category"] == "Adults")]
Adults_Rating_All

Unnamed: 0_level_0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings,Rating Category
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
18+,,,1243.0,,Adults
NC-17,3.0,,3.0,,Adults
R,799.0,345.0,1010.0,,Adults
TV-MA,3207.0,391.0,77.0,,Adults
UNRATED,,,33.0,,Adults
UR,3.0,,,,Adults


In [15]:
Adults_Rating_Netflix = Adults_Rating_All["Netflix Ratings"].sum()
Adults_Rating_Netflix

4012.0

In [16]:
Adults_Rating_Hulu = Adults_Rating_All["Hulu Ratings"].sum()
Adults_Rating_Hulu

736.0

In [17]:
Adults_Rating_Amazon = Adults_Rating_All["Amazon Ratings"].sum()
Adults_Rating_Amazon

2366.0

In [18]:
Adults_Rating_Disney = Adults_Rating_All["Disney Ratings"].sum()
Adults_Rating_Disney

0.0

In [19]:
Teens_Rating_All = All_rating_filtered_df[(All_rating_filtered_df["Rating Category"] == "Teens")]
Teens_Rating_All

Unnamed: 0_level_0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings,Rating Category
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
13+,,,2117.0,,Teens
16+,,,1547.0,,Teens
PG-13,490.0,163.0,393.0,66.0,Teens
TV-14,2160.0,691.0,208.0,79.0,Teens


In [20]:
Teens_Rating_Netflix = Teens_Rating_All["Netflix Ratings"].sum()
Teens_Rating_Netflix

2650.0

In [21]:
Teens_Rating_Hulu = Teens_Rating_All["Hulu Ratings"].sum()
Teens_Rating_Hulu

854.0

In [22]:
Teens_Rating_Amazon = Teens_Rating_All["Amazon Ratings"].sum()
Teens_Rating_Amazon

4265.0

In [23]:
Teens_Rating_Disney = Teens_Rating_All["Disney Ratings"].sum()
Teens_Rating_Disney

145.0

In [24]:
Kids_Rating_All = All_rating_filtered_df[(All_rating_filtered_df["Rating Category"] == "Kids")]
Kids_Rating_All

Unnamed: 0_level_0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings,Rating Category
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
7+,,,385.0,,Kids
PG,287.0,105.0,253.0,236.0,Kids
TV-PG,863.0,321.0,169.0,301.0,Kids
TV-Y7,334.0,46.0,39.0,131.0,Kids
TV-Y7-FV,6.0,,,13.0,Kids


In [25]:
Kids_Rating_Netflix = Kids_Rating_All["Netflix Ratings"].sum()
Kids_Rating_Netflix

1490.0

In [26]:
Kids_Rating_Hulu = Kids_Rating_All["Hulu Ratings"].sum()
Kids_Rating_Hulu

472.0

In [27]:
Kids_Rating_Amazon = Kids_Rating_All["Amazon Ratings"].sum()
Kids_Rating_Amazon

846.0

In [28]:
Kids_Rating_Disney = Kids_Rating_All["Disney Ratings"].sum()
Kids_Rating_Disney

681.0

In [29]:
AllAges_Rating_All = All_rating_filtered_df[(All_rating_filtered_df["Rating Category"] == "All Ages")]
AllAges_Rating_All

Unnamed: 0_level_0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings,Rating Category
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ALL,,,1268.0,,All Ages
G,41.0,18.0,93.0,253.0,All Ages
TV-G,220.0,148.0,81.0,318.0,All Ages
TV-Y,307.0,31.0,74.0,50.0,All Ages


In [30]:
AllAges_Rating_Netflix = AllAges_Rating_All["Netflix Ratings"].sum()
AllAges_Rating_Netflix

568.0

In [31]:
AllAges_Rating_Hulu = AllAges_Rating_All["Hulu Ratings"].sum()
AllAges_Rating_Hulu

197.0

In [32]:
AllAges_Rating_Amazon = AllAges_Rating_All["Amazon Ratings"].sum()
AllAges_Rating_Amazon

1516.0

In [33]:
AllAges_Rating_Disney = AllAges_Rating_All["Disney Ratings"].sum()
AllAges_Rating_Disney

621.0

In [34]:
AllAges_Rating_All_Df = pd.DataFrame (
    {"Netflix Ratings": [AllAges_Rating_Netflix],
     "Hulu Ratings": [AllAges_Rating_Hulu],
     "Amazon Ratings": [AllAges_Rating_Amazon],
     "Disney Ratings": [AllAges_Rating_Disney]},
index=["All Ages"])

AllAges_Rating_All_Df

Unnamed: 0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings
All Ages,568.0,197.0,1516.0,621.0


In [35]:
Kids_Rating_All_Df = pd.DataFrame (
    {"Netflix Ratings": [Kids_Rating_Netflix],
     "Hulu Ratings": [Kids_Rating_Hulu],
     "Amazon Ratings": [Kids_Rating_Amazon],
     "Disney Ratings": [Kids_Rating_Disney]},
index=["Kids"])

Kids_Rating_All_Df

Unnamed: 0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings
Kids,1490.0,472.0,846.0,681.0


In [36]:
Teens_Rating_All_Df = pd.DataFrame (
    {"Netflix Ratings": [Teens_Rating_Netflix],
     "Hulu Ratings": [Teens_Rating_Hulu],
     "Amazon Ratings": [Teens_Rating_Amazon],
     "Disney Ratings": [Teens_Rating_Disney]},
index=["Teens"])

Teens_Rating_All_Df

Unnamed: 0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings
Teens,2650.0,854.0,4265.0,145.0


In [37]:
Adults_Rating_All_Df = pd.DataFrame (
    {"Netflix Ratings": [Adults_Rating_Netflix],
     "Hulu Ratings": [Adults_Rating_Hulu],
     "Amazon Ratings": [Adults_Rating_Amazon],
     "Disney Ratings": [Adults_Rating_Disney]},
index=["Adults"])

Adults_Rating_All_Df

Unnamed: 0,Netflix Ratings,Hulu Ratings,Amazon Ratings,Disney Ratings
Adults,4012.0,736.0,2366.0,0.0


In [38]:
All_Ratings_Complete_Df = pd.DataFrame (
    {"Netflix": [AllAges_Rating_Netflix, Kids_Rating_Netflix, Teens_Rating_Netflix, Adults_Rating_Netflix],
     "Hulu": [AllAges_Rating_Hulu, Kids_Rating_Hulu, Teens_Rating_Hulu, Adults_Rating_Hulu],
     "Amazon": [AllAges_Rating_Amazon, Kids_Rating_Amazon, Teens_Rating_Amazon, Adults_Rating_Amazon],
     "Disney": [AllAges_Rating_Disney, Kids_Rating_Disney, Teens_Rating_Disney, Adults_Rating_Disney]},
index=["All Ages", "Kids", "Teens", "Adults"])

All_Ratings_Complete_Df

Unnamed: 0,Netflix,Hulu,Amazon,Disney
All Ages,568.0,197.0,1516.0,621.0
Kids,1490.0,472.0,846.0,681.0
Teens,2650.0,854.0,4265.0,145.0
Adults,4012.0,736.0,2366.0,0.0


In [40]:
All_Ratings_Complete_Df.to_csv('resources/Ratings_Summary.csv', index=True)