In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import plotly.express as px
import plotly.graph_objects as go

In [2]:
netflix = Path("Resources/netflix_data.csv")
hulu = Path("Resources/hulu_data.csv")
amazon = Path("Resources/amazon_data.csv")
disney=Path("Resources/disney_data.csv")

netflix_df =pd.read_csv(netflix)
hulu_df =pd.read_csv(hulu)
amazon_df =pd.read_csv(amazon)
disney_df =pd.read_csv(disney)

In [3]:
netflix_df['streaming_service'] = 'Netflix'
hulu_df['streaming_service'] = 'Hulu'
amazon_df['streaming_service'] = 'Amazon'
disney_df['streaming_service'] = 'Disney'


In [4]:
combined_df = pd.concat([netflix_df, hulu_df, amazon_df, disney_df], axis=0)

In [6]:
new_combined =combined_df.drop(['director','cast','description'], axis=1) 

In [None]:

new_combined.to_csv('resources/combined_cleaned_data.csv', index=False)


In [8]:
combined_df.to_csv('resources/combined_streaming_data.csv', index=False)

In [None]:
# create a table for the release year

In [10]:
def get_decade_counts(df):
    df['decade'] = (df['release_year'] // 10) * 10
    decade_counts = df.groupby(['streaming_service', 'decade']).size().reset_index(name='count')
    return decade_counts

# get the count from the the new_combined df
decade_counts = get_decade_counts(new_combined)

# Define colors for each streaming service
colors = {
    'Netflix': 'red',
    'Disney': 'purple',
    'Hulu': 'green'
}

# Create a bar chart
fig = go.Figure()

# Add traces for each streaming service with colors
for service in decade_counts['streaming_service'].unique():
    service_data = decade_counts[decade_counts['streaming_service'] == service]
    fig.add_trace(go.Bar(
        x=service_data['decade'].astype(str),
        y=service_data['count'],
        name=service,
        marker=dict(color=colors.get(service, 'blue'))  
    ))

# Add dropdown menu
fig.update_layout(
    updatemenus=[
        {
            "buttons": [
                {
                    "label": service,
                    "method": "update",
                    "args": [{"visible": [s == service for s in decade_counts['streaming_service'].unique()]},
                             {"title": f"Movies by Decade ({service})"}],
                } for service in decade_counts['streaming_service'].unique()
            ],
            "direction": "down",
            "showactive": True,
        }
    ]
)

# Set titles
fig.update_layout(
    title="Movies by Decade",
    xaxis_title="Decade",
    yaxis_title="Count",
)

# Show the plot
fig.show()

In [11]:
# group each streaming service from the new cleaned data frame
netflix_data = new_combined[new_combined['streaming_service'] == 'Netflix']
netflix_genre= netflix_data['listed_in'].value_counts()
netflix_10 = netflix_genre.head(10)


hulu_data = new_combined[new_combined['streaming_service'] == 'Hulu']
hulu_genre= hulu_data['listed_in'].value_counts()
hulu_10 = hulu_genre.head(10)


amazon_data = new_combined[new_combined['streaming_service'] == 'Amazon']
amazon_genre= amazon_data['listed_in'].value_counts()
amazon_10 = amazon_genre.head(10)



disney_data = new_combined[new_combined['streaming_service'] == 'Disney']
disney_genre= disney_data['listed_in'].value_counts()
disney_10 = disney_genre.head(10)

# top genre from accross all the streaming service
genre_all= combined_df['listed_in'].value_counts()
top10_genre = genre_all.head(10)




In [12]:
#  top 10 disney
fig = go.Figure()

# Top 10 Disney
fig.add_trace(go.Bar(
    x=disney_10.index,
    y=disney_10.values,
    name='Disney',
    marker=dict(color='purple')
))

# Top 10 Hulu
fig.add_trace(go.Bar(
    x=hulu_10.index,
    y=hulu_10.values,
    name='Hulu',
    marker=dict(color='green')
))

# Top 10 Amazon
fig.add_trace(go.Bar(
    x=amazon_10.index,
    y=amazon_10.values,
    name='Amazon',
    marker=dict(color='blue')
))

# Top 10 Netflix
fig.add_trace(go.Bar(
    x=netflix_10.index,
    y=netflix_10.values,
    name='Netflix',
    marker=dict(color='red')
))

# Top 10 for all streaming services
fig.add_trace(go.Bar(
    x=top10_genre.index,
    y=top10_genre.values,
    name='Genres',
    visible=False
))

# Dropdown menu
fig.update_layout(
    updatemenus=[
        {
            "buttons": [
                {
                    "label": "Disney",
                    "method": "update",
                    "args": [{"visible": [True, False, False, False, False]}, {"title": "Top 10 Genres in Disney"}],
                },
                {
                    "label": "Hulu",
                    "method": "update",
                    "args": [{"visible": [False, True, False, False, False]}, {"title": "Top 10 Genres in Hulu"}],
                },
                {
                    "label": "Amazon",
                    "method": "update",
                    "args": [{"visible": [False, False, True, False, False]}, {"title": "Top 10 Genres in Amazon"}],
                },
                {
                    "label": "Netflix",
                    "method": "update",
                    "args": [{"visible": [False, False, False, True, False]}, {"title": "Top 10 Genres in Netflix"}],
                },
                {
                    "label": "All",
                    "method": "update",
                    "args": [{"visible": [False, False, False, False, True]}, {"title": "Top 10 Genres"}],
                }
            ],
            "direction": "down",
            "showactive": True,
        }
    ]
)

# Set titles
fig.update_layout(
    title="Top 10 Genres",
    xaxis_title="Genre",
    yaxis_title="Count"
)

# Show the plot
fig.show()