# Import the "Pandas" library

In [None]:
import pandas as pd

# Load the Netflix data from the downloaded file

In [None]:
netflix = pd.read_csv("B:\\My Data Projects\\netflix_titles.csv", index_col = "show_id")
netflix

# Check for missing data

In [None]:
print(netflix.isnull().sum())

# Fill missing directors with "Unknown"

In [None]:
netflix["director"].fillna("Unkown")

# Do thesame for missing country

In [None]:
netflix["country"].fillna("Unknown")

# Fix dates

In [None]:
netflix["date_added"] = pd.to_datetime(netflix["date_added"], errors = "coerce")
netflix["date_added"].fillna("Unknown")

# Count Movies Vs TV Shows

In [None]:
print(netflix['types'].value_counts())

# Oldest movie on netflix

In [None]:
oldest_movie = netflix[netflix['type'] == 'Movie'].sort_values('release_year').head(1)
print(oldest_movie[['title', 'release_year']]

# Top five(5) Countries producing content

In [None]:
top_countries = netflix['country'].value_counts().head(5)
print(top_countries)

# Most common Genre

In [None]:
netflix['genre_split'] = netflix['listed_in'].str.split(', ')
genre = netflix.explode('genre_split')
print(genre['genre_split'].value_counts().head())

# How much content added each year

In [None]:
netflix['year_added'] = pd.to_datetime(netflix['date_added']).dt.year
yearly_content = netflix["year_added"].value_counts().sort_index()
print(yearly_content)

# Visualizations

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('dark')

# Pie Char

In [None]:
sns.set_palette("colorblind")
type_counts = netflix['type'].value_counts()
plt.figure(figsize = (6, 6))
plt.pie(type_counts,
        labels = type_counts.index,
        autopct = '%.2f%%',
        colors = ['#FF6B6B', '#4ECDC4']
       )
plt.title("Netflix Content: Movies VS TV Shows", fontsize = 24)
plt.legend()
plt.show()

# Most common Genre

In [None]:
genres = netflix['listed_in'].str.split(', ').explode()
top_genres = genres.value_counts().head(10)

# Create a bar plot
plt.figure(figsize=(10, 6))
sns.barplot(x=top_genres.values, y=top_genres.index) #palette = "viridis"
plt.title('Top 10 Genres on Netflix', fontsize = 24, weight = "bold")
plt.xlabel('Number of Titles', fontsize = 20, weight = "bold")
plt.ylabel('Genres', fontsize = 20, weight = "bold")
plt.show()

# Content Added Over Time

In [None]:
plt.figure(figsize = (10, 5))
sns.lineplot(x = yearly_content.index, y= yearly_content.values, color = '#9370DB')
plt.title('Netflix Content Added by Year', fontsize = 24, weight = "bold")
plt.xlabel('Year', fontsize = 20, weight = "bold")
plt.ylabel('Number of Titles Added', fontsize = 20, weight = "bold")
plt.grid(True)
plt.show()

# Release Year Distribution

In [None]:
plt.figure(figsize = (10, 5))
sns.set_style('darkgrid')
sns.histplot(netflix['release_year'], bins = 30, kde = True, color = '#ffa07a')
plt.title('Distribution of Release Years', fontsize = 24, weight = "bold")
plt.xlabel('Release Year', fontsize =20, weight = "bold")
plt.ylabel('Number of Titles', fontsize =20, weight= "bold")
plt.axvline(x=netflix['release_year'].median(), color='red', linestyle='--', label='Median Year')
plt.legend()
plt.show()

# THANK YOU