In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from wordcloud import WordCloud
from sklearn.cluster import KMeans
import numpy as np

# Load the dataset
content_df = pd.read_csv('Netflix-Data-Analysis.csv')


## 1. Data Overview & Missing Values

In [None]:
missing_values = content_df.isnull().sum()
sns.barplot(x=missing_values.index, y=missing_values.values)
plt.xticks(rotation=90)
plt.title('Missing Values in Each Column')
plt.show()

## 2. Content Distribution

In [None]:
sns.countplot(x='type', data=content_df)
plt.title('Distribution of Content Types')
plt.show()

## 3. Top Directors

In [None]:
top_directors = content_df['director'].value_counts().head(10)
sns.barplot(y=top_directors.index, x=top_directors.values)
plt.title('Top 10 Directors by Number of Titles')
plt.show()

## 4. Cast Analysis

In [None]:
cast_members = content_df['cast'].dropna().str.split(', ').explode()
top_cast = cast_members.value_counts().head(10)
sns.barplot(y=top_cast.index, x=top_cast.values)
plt.title('Top 10 Actors/Actresses')
plt.show()

## 5. Content by Country

In [None]:
top_countries = content_df['country'].value_counts().head(10)
sns.barplot(y=top_countries.index, x=top_countries.values)
plt.title('Top 10 Countries by Number of Titles')
plt.show()

## 6. Release Year Distribution

In [None]:
sns.histplot(content_df['release_year'], bins=20, kde=True)
plt.title('Release Year Distribution')
plt.show()

## 7. Word Cloud of Titles

In [None]:
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(content_df['title']))
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud of Titles')
plt.show()