In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set a professional plot style
sns.set_style("whitegrid")

# --- 1. Data Loading and Preprocessing ---
# FIX 1: Load the dataset using only the filename to avoid FileNotFoundError
df = pd.read_csv("Netflix Dataset.csv")

# FIX 2: Strip whitespace and specify date format to handle inconsistent date strings.
df['Release_Date'] = pd.to_datetime(df['Release_Date'].str.strip(), format="%B %d, %Y", errors='coerce')
df['Release_Year'] = df['Release_Date'].dt.year.astype('Int64')

# Drop rows where Release_Year is missing
df.dropna(subset=['Release_Year'], inplace=True)

# For 'Country' analysis, we'll only consider the first listed country
df['Country_Main'] = df['Country'].str.split(',').str[0].str.strip()


# --- 2. Objective 1: Movies vs. TV Shows Distribution Over Years ---
content_over_time = df.groupby(['Release_Year', 'Category']).size().unstack(fill_value=0)
content_over_time = content_over_time.sort_index()

plt.figure(figsize=(12, 6))
content_over_time[['Movie', 'TV Show']].plot(kind='area', stacked=True, ax=plt.gca(),
                                            color=['#e50914', '#221f1f'])
plt.title('Netflix Content Production Trends: Movies vs. TV Shows by Release Year', fontsize=16)
plt.xlabel('Release Year', fontsize=12)
plt.ylabel('Number of Titles Added', fontsize=12)
plt.legend(title='Category', loc='upper left')
plt.tight_layout()
plt.savefig('netflix_content_trends_by_year.png')
plt.close()


# --- 3. Objective 2: Top 10 Most Common Genres ---
df_genres = df.assign(Genre = df['Type'].str.split(', ')).explode('Genre')
df_genres['Genre'] = df_genres['Genre'].str.strip()
top_genres = df_genres['Genre'].value_counts().nlargest(10)

plt.figure(figsize=(12, 6))
# FIX 3: Added hue and legend=False to resolve FutureWarning
sns.barplot(x=top_genres.index, y=top_genres.values, hue=top_genres.index, palette='viridis', legend=False)
plt.title('Top 10 Most Common Genres on Netflix', fontsize=16)
plt.xlabel('Genre', fontsize=12)
plt.ylabel('Number of Titles', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('netflix_top_10_genres.png')
plt.close()


# --- 4. Objective 3: Top 10 Country-wise Contributions ---
country_contributions = df.dropna(subset=['Country_Main'])
top_countries = country_contributions['Country_Main'].value_counts().nlargest(10)

plt.figure(figsize=(12, 6))
# FIX 4: Added hue and legend=False to resolve FutureWarning
sns.barplot(x=top_countries.index, y=top_countries.values, hue=top_countries.index, palette='rocket', legend=False)
plt.title('Top 10 Countries Contributing to Netflix Content', fontsize=16)
plt.xlabel('Country', fontsize=12)
plt.ylabel('Number of Titles', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('netflix_top_10_countries.png')
plt.close()