In [1]:
import networkx as nx
import streamlit as st
import matplotlib as plt
import seaborn as sns

In [2]:
import pandas as pd
import numpy as np
import plotly.express as px

In [3]:
# Load dataset
netflix = pd.read_csv('netflix_titles_2021.csv')
# Data Cleaning & Handling Missing Values
netflix.drop_duplicates(inplace=True)
netflix.dropna(subset=['release_year', 'type', 'listed_in'], inplace=True)
netflix['date_added'] = pd.to_datetime(netflix['date_added'], format='mixed', errors='coerce')
# Feature Engineering
netflix['year_added'] = netflix['date_added'].dt.year
netflix['month_added'] = netflix['date_added'].dt.month
# Create Movie Duration Ranges
bins = [0, 60, 120, np.inf]
labels = ['Short', 'Medium', 'Long']
netflix.loc[netflix['duration'].str.contains('min', na=False), 'duration'] = netflix['duration'].str.replace(' min', '')
netflix['duration'] = pd.to_numeric(netflix['duration'], errors='coerce')
netflix['duration_category'] = pd.cut(netflix['duration'], bins=bins, labels=labels)
# Identify Top 10 Directors
top_directors = netflix['director'].value_counts().head(10)
# Set seaborn style
sns.set(style='darkgrid', palette='muted', rc={'figure.figsize':(10,6)})

In [4]:
# Interactive Time Series Analysis
fig_time_series = px.line(netflix.groupby('year_added').size().reset_index(), x='year_added', y=0, title='Trend of Netflix Content Over Time')
fig_time_series.update_layout(xaxis_title='Year Added', yaxis_title='Number of Titles')
# Geo Map for Country-wise Content
country_counts = netflix['country'].value_counts().reset_index()
country_counts.columns = ['Country', 'Count']
fig_geo = px.choropleth(country_counts, locations='Country', locationmode='country names', color='Count', title='Content Distribution Across Countries')