In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px


In [None]:
url = "https://raw.githubusercontent.com/sagnik1511/netflix-data-analysis/main/netflix_titles.csv"
df = pd.read_csv(url)
df.head()


In [None]:
df.info()
df.isnull().sum()

# Fill missing values for 'country' and 'rating' with mode
df['country'].fillna(df['country'].mode()[0], inplace=True)
df['rating'].fillna(df['rating'].mode()[0], inplace=True)
df['date_added'].fillna(method='ffill', inplace=True)
df['duration'].fillna('0 min', inplace=True)

# Convert 'date_added' to datetime
df['date_added'] = pd.to_datetime(df['date_added'])
df['year_added'] = df['date_added'].dt.year


In [None]:
# Number of movies vs TV Shows
sns.countplot(data=df, x='type', palette='pastel')
plt.title('Content Type Distribution')
plt.show()

# Top 10 genres (from 'listed_in')
from collections import Counter

genre_list = df['listed_in'].str.split(', ')
all_genres = [genre for sublist in genre_list for genre in sublist]
genre_count = Counter(all_genres)

pd.Series(genre_count).nlargest(10).plot(kind='bar', color='teal')
plt.title('Top 10 Genres on Netflix')
plt.ylabel('Number of Titles')
plt.show()


In [None]:
# Count content released by year
df['release_year'].value_counts().sort_index().plot(kind='line', figsize=(12,5))
plt.title('Content Released per Year')
plt.xlabel('Year')
plt.ylabel('Number of Titles')
plt.grid(True)
plt.show()


In [None]:
# Count by country
top_countries = df['country'].value_counts().head(10)
sns.barplot(y=top_countries.index, x=top_countries.values, palette='magma')
plt.title("Top 10 Countries Producing Netflix Content")
plt.xlabel("Number of Titles")
plt.ylabel("Country")
plt.show()


In [None]:
top_directors = df['director'].dropna().value_counts().head(10)
top_directors.plot(kind='barh', color='coral')
plt.title("Top 10 Directors on Netflix")
plt.xlabel("Number of Titles")
plt.gca().invert_yaxis()
plt.show()


In [None]:
from wordcloud import WordCloud

text = ' '.join(df['description'].dropna())
wc = WordCloud(width=800, height=400, background_color='black').generate(text)

plt.figure(figsize=(12, 6))
plt.imshow(wc, interpolation='bilinear')
plt.axis('off')
plt.title("Word Cloud of Descriptions")
plt.show()
