In [25]:
pip install pandas numpy scikit-learn matplotlib seaborn plotly ipywidgets

Note: you may need to restart the kernel to use updated packages.


In [26]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [27]:
# Read the CSV file
df = pd.read_csv("netflix_data.csv")

In [33]:
# Drop rows with missing values
df.dropna(inplace=True)

# Convert 'date_added' to datetime format
df['date_added'] = pd.to_datetime(df['date_added'])

# Extract year and month from 'date_added'
df['year_added'] = df['date_added'].dt.year
df['month_added'] = df['date_added'].dt.month

In [None]:
# Descriptive Method: Data Exploration
@interact
def explore_data(column=df.columns):
    plt.figure(figsize=(10, 6))
    df[column].value_counts().plot(kind='bar')
    plt.title(f'Distribution of {column}')
    plt.xlabel(column)
    plt.ylabel('Count')
    plt.show()


In [None]:
# Nondescriptive Method: Predictive Analysis - Viewer Age Prediction
# Split the data into TV Shows and Movies
tv_shows = df[df['type'] == 'TV Show']
movies = df[df['type'] == 'Movie']

# Split the data into training and testing sets for TV Shows
X_train_tv, X_test_tv, y_train_tv, y_test_tv = train_test_split(tv_shows[['duration', 'year_added']], tv_shows['rating'], test_size=0.2, random_state=42)

# Train the model for TV Shows
model_tv = LinearRegression()
model_tv.fit(X_train_tv, y_train_tv)

# Predict for TV Shows
y_pred_tv = model_tv.predict(X_test_tv)

# Evaluate the model for TV Shows
mse_tv = mean_squared_error(y_test_tv, y_pred_tv)
print(f'Mean Squared Error (TV Shows): {mse_tv}')


In [38]:
# Data Visualization Functionality
@interact
def pie_chart(column=df.columns):
    fig = px.pie(df, names=column, title=f'Distribution of {column}', hole=0.5)
    fig.show()

interactive(children=(Dropdown(description='column', options=('show_id', 'type', 'title', 'director', 'cast', …

In [39]:
# Data Visualization Functionality
@interact
def scatter_plot(x=df.columns, y=df.columns):
    fig = px.scatter(df, x=x, y=y, title=f'{y} vs. {x}')
    fig.show()

interactive(children=(Dropdown(description='x', options=('show_id', 'type', 'title', 'director', 'cast', 'coun…

In [None]:
# Data Visualization Functionality
def correlation_heatmap():
    plt.figure(figsize=(10, 6))
    sns.heatmap(df.corr(), annot=True, cmap='coolwarm', linewidths=.5)
    plt.title('Correlation Heatmap')
    plt.show()
correlation_heatmap()