<a href="https://colab.research.google.com/github/Samridhi-Aggarwaal/Data-Analysis-of-Netflix-Content/blob/main/Netflix_Dashboard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

🔹 Step 1: Install Required Libraries

In [1]:
!pip install plotly ipywidgets --quiet

🔹 Step 2: Import Libraries & Load Dataset

In [2]:
import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.express as px
import ipywidgets as widgets
from google.colab import output
import plotly.graph_objects as go
from IPython.display import display, HTML
from pandas._config.config import options

pio.renderers.default = "colab"
output.enable_custom_widget_manager()

In [3]:
df = pd.read_csv("/netflix.csv")

🔹 Step 3: Basic Data Cleaning

In [4]:
df.drop_duplicates(inplace=True)

df['age_certification'] = df['age_certification'].fillna('Unknown')
df['imdb_score'] = df['imdb_score'].fillna(df['imdb_score'].median())
df['imdb_votes'] = df['imdb_votes'].fillna(0)
df['seasons'] = df['seasons'].fillna(0)
df['genres'] = df['genres'].fillna('').apply(lambda x: x.split(','))
df['production_countries'] = df['production_countries'].fillna('').apply(lambda x: x.split(','))

🎨 Netflix Executive Styling

In [10]:
# Netflix Theme Colors
NETFLIX_RED = "#E50914"
NETFLIX_BLACK = "#141414"
NETFLIX_DARK = "#181818"

#Executive Header
display(HTML(f"""
<div style="background-color:{NETFLIX_BLACK}; padding:25px; border-radius:12px; margin-bottom:20px">
  <h1 style="color:{NETFLIX_RED}; text-align: center; font-size:38px;">
    🎬 NETFLIX EXECUTIVE ANALYTICS update_dashboard
  </h1>
  <h3 style="color:white; text-align: center;">
    Data-Driven Stratergic Insights for Decision-Making
  </h3>
</div>
"""))

🎛️ INTERACTIVE EXECUTIVE FILTERS (Dashboard Controls)

In [11]:
year_slider = widgets.IntRangeSlider(
      value=[df.release_year.min(), df.release_year.max()],
      min = df.release_year.min(),
      max = df.release_year.max(),
      step = 1,
      description='Year Range:',
      style={'description_width': 'initial'},
      layout=widgets.Layout(width='70%')
)

type_dropdown = widgets.SelectMultiple(
    options=df['type'].unique(),
    value=list(df['type'].unique()),
    description='Content Type:',
    layout=widgets.Layout(width='70%')
)

display(year_slider, type_dropdown)

IntRangeSlider(value=(1945, 2022), description='Year Range:', layout=Layout(width='70%'), max=2022, min=1945, …

SelectMultiple(description='Content Type:', index=(0, 1), layout=Layout(width='70%'), options=('SHOW', 'MOVIE'…

📊 INTERACTIVE EXECUTIVE DASHBOARD FUNCTION

In [12]:
def update_dashboard(year_range, selected_types):
  filtered_df = df[
      (df.release_year >= year_range[0]) &
      (df.release_year <= year_range[1]) &
      (df.type.isin(selected_types))
  ]

  # ================= KPI CARDS =================
  total_titles = len(filtered_df)
  movies = len(filtered_df[filtered_df.type == "Movie"])
  shows = len(filtered_df[filtered_df.type == "Show"])
  avg_score = round(filtered_df.imdb_score.mean(),2)
  total_votes = filtered_df.imdb_votes.sum()

  display(HTML(f"""
  <div style="display: flex;justify-content:space-between; margin-bottom:30px">
    <div style="background:{NETFLIX_DARK}; padding: 20px; border-radius: 10px; width:18%">
      <h2 style="color:{NETFLIX_RED}; text-align: center;">{total_titles}</h2>
      <p style="color:white; text-align: center;">Total Titles</p>
    </div>
    <div style="background:{NETFLIX_DARK}; padding: 20px; border-radius: 10px; width:18%">
      <h2 style="color:{NETFLIX_RED}; text-align: center;">{movies}</h2>
      <p style="color:white; text-align: center;">Movies</p>
    </div>
    <div style="background:{NETFLIX_DARK}; padding: 20px; border-radius: 10px; width:18%">
      <h2 style="color:{NETFLIX_RED}; text-align: center;">{shows}</h2>
      <p style="color:white; text-align: center;">Shows</p>
    </div>
    <div style="background:{NETFLIX_DARK}; padding: 20px; border-radius: 10px; width:18%">
      <h2 style="color:{NETFLIX_RED}; text-align: center;">{avg_score}</h2>
      <p style="color:white; text-align: center;">Average Score</p>
    </div>
    <div style="background:{NETFLIX_DARK}; padding: 20px; border-radius: 10px; width:18%">
      <h2 style="color:{NETFLIX_RED}; text-align: center;">{total_votes}</h2>
      <p style="color:white; text-align: center;">Total Votes</p>
    </div>
  </div>
  """))

  # ================= CHARTS =================
  # Trend Analysis
  trend = filtered_df.release_year.value_counts().sort_index().reset_index()
  trend.columns = ['Release Year', 'Count']
  fig1 = px.line(trend, x='Release Year', y='Count', template='plotly_dark', title='Content Growth Trend')
  fig1.update_traces(line_color=NETFLIX_RED)
  fig1.show()

  # Movies vs Shows
  type_count = filtered_df.type.value_counts().reset_index()
  type_count.columns = ['Type', 'Count']
  fig2 = px.pie(type_count, values='Count', names='Type', template= 'plotly_dark', title='Movies vs Shows')
  fig2.update_traces(textposition='inside', textinfo='percent+label', marker=dict(colors=[NETFLIX_RED, NETFLIX_DARK]))
  fig2.show()

  # Genre Comparison
  genre_df = filtered_df.explode('genres')
  top_genres = genre_df['genres'].value_counts().head(10).reset_index()
  top_genres.columns = ['Genre', 'Count']
  fig3 = px.bar(top_genres, x='Genre', y='Count', template='plotly_dark', title='Top 10 Genres')
  fig3.update_traces(marker_color=NETFLIX_RED)
  fig3.show()

  # Audience Targetting
  age_df = filtered_df.age_certification.value_counts().reset_index()
  age_df.columns = ['Age Certification', 'Count']
  fig4 = px.bar(age_df, x='Age Certification', y='Count', template='plotly_dark', title='Content by Age Certification')
  fig4.update_traces(marker_color=NETFLIX_RED)
  fig4.show()

  # Viewer Engagement
  fig5 = px.scatter(filtered_df, x='imdb_score', y='imdb_votes', template='plotly_dark', title='IMDb Votes vs IMDb Score', hover_data=['title'])
  fig5.update_traces(marker=dict(color=NETFLIX_RED))
  fig5.show()

# **Predictive Modeling**

In [9]:
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer

genre_encoded = MultiLabelBinarizer()
genre_matrix = genre_encoded.fit_transform(df['genres'])

genre_df = pd.DataFrame(genre_matrix, columns=genre_encoded.classes_)
model_df = pd.concat([df[['release_year','runtime','imdb_votes']], genre_df], axis=1)

X = model_df
y = df['imdb_votes']

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

model = RandomForestRegressor()
model.fit(X_train, y_train)

pred = model.predict(X_test)

print("Model R^2 Score: ", r2_score(y_test, pred))

Model R^2 Score:  0.9936052643321892


🔹 Step 4: Activate Dashboard

In [13]:
widgets.interactive(
    update_dashboard,
    year_range=year_slider,
    selected_types=type_dropdown
)

interactive(children=(IntRangeSlider(value=(1945, 2022), description='Year Range:', layout=Layout(width='70%')…