# Exploring Netflix Titles: Trends and Insights

This notebook walks through loading, cleaning, exploring, and visualizing the **Netflix Titles** dataset.

In [None]:
# Setup
import pandas as pd
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)
print('Libraries loaded!')

In [None]:
# Load data
PATH = '../data/netflix_titles.csv'  # adjust if needed
df = pd.read_csv(PATH)
df.head()

In [None]:
# Quick explore
print(df.shape)
df.isna().sum()

In [None]:
# Basic cleaning
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')
df['date_added'] = pd.to_datetime(df['date_added'], errors='coerce')
df['release_year'] = pd.to_numeric(df['release_year'], errors='coerce')
df.head()

## Questions

In [None]:
# 1) Movies vs TV Shows
counts = df['type'].value_counts(dropna=False)
ax = counts.plot(kind='bar', title='Movies vs TV Shows')
plt.xlabel('Type'); plt.ylabel('Count')
plt.tight_layout()
plt.savefig('../images/movies_vs_tvshows.png', dpi=150)
counts

In [None]:
# 2) Top genres (from `listed_in`)
genres = (
    df.assign(listed_in=df['listed_in'].fillna(''))
      .assign(genre=df['listed_in'].str.split(', '))
      .explode('genre')
      .query("genre != ''")
)
top_genres = genres['genre'].value_counts().head(15)
ax = top_genres.plot(kind='bar', title='Top Genres (Top 15)')
plt.xlabel('Genre'); plt.ylabel('Count')
plt.tight_layout()
plt.savefig('../images/top_genres.png', dpi=150)
top_genres

In [None]:
# 3) Content over time by release year
by_year = df.groupby('release_year').size().sort_index()
ax = by_year.plot(kind='line', title='Titles by Release Year')
plt.xlabel('Year'); plt.ylabel('Number of Titles')
plt.tight_layout()
plt.savefig('../images/titles_by_release_year.png', dpi=150)
by_year.tail(10)

In [None]:
# 4) Top producing countries (using the `country` column)
countries = (
    df.assign(country=df['country'].fillna(''))
      .assign(cc=df['country'].str.split(', '))
      .explode('cc')
      .query("cc != ''")
)
top_countries = countries['cc'].value_counts().head(15)
ax = top_countries.plot(kind='bar', title='Top Producing Countries (Top 15)')
plt.xlabel('Country'); plt.ylabel('Count')
plt.tight_layout()
plt.savefig('../images/top_countries.png', dpi=150)
top_countries

## Summary (fill in your insights)
- Add 3–5 bullets summarizing what you found.
- Example: "Movies are more common than TV Shows."