# üé¨ Netflix Data Analysis using Python
### Author: [Your Name]
**Objective:** Explore Netflix dataset to analyze trends in movie and TV show releases, genres, and ratings.

In [None]:
# üß© Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# üì• Step 2: Load Dataset
df = pd.read_csv("netflix_data.csv")
df.head()

In [None]:
# üîç Step 3: Basic Info
print("Shape:", df.shape)
print("\nColumns:", df.columns.tolist())
print("\nInfo:")
print(df.info())
print("\nMissing Values:\n", df.isnull().sum())

In [None]:
# üßπ Step 4: Data Cleaning
df.drop_duplicates(inplace=True)
df['director'].fillna('Unknown', inplace=True)
df['country'].fillna('Unknown', inplace=True)
df['rating'].fillna('Not Rated', inplace=True)
print("‚úÖ Data cleaned successfully!")

In [None]:
# üìä Step 5: Exploratory Data Analysis (EDA)

# Count of Movies vs TV Shows
plt.figure(figsize=(5,4))
sns.countplot(x='type', data=df, palette='Set2')
plt.title("Count of Movies vs TV Shows")
plt.show()

# Most common genres
plt.figure(figsize=(10,5))
df['listed_in'].value_counts().plot(kind='bar', color='coral')
plt.title("Top Genres on Netflix")
plt.ylabel("Count")
plt.show()

# Releases over years
plt.figure(figsize=(10,5))
sns.countplot(x='release_year', data=df, palette='coolwarm', order=sorted(df['release_year'].unique()))
plt.xticks(rotation=90)
plt.title("Number of Releases per Year")
plt.show()

# Ratings distribution
plt.figure(figsize=(6,4))
df['rating'].value_counts().plot(kind='bar', color='skyblue')
plt.title("Distribution of Ratings")
plt.ylabel("Count")
plt.show()

# Top countries producing Netflix content
plt.figure(figsize=(8,4))
df['country'].value_counts().head(10).plot(kind='bar', color='teal')
plt.title("Top 10 Content Producing Countries")
plt.ylabel("Count")
plt.show()

In [None]:
# üí° Step 6: Insights Summary
print("‚úÖ Insights:")
print(f"""
1Ô∏è‚É£ Total shows analyzed: {len(df)}
2Ô∏è‚É£ Total Movies: {len(df[df['type']=='Movie'])}
3Ô∏è‚É£ Total TV Shows: {len(df[df['type']=='TV Show'])}
4Ô∏è‚É£ Most common genre: {df['listed_in'].mode()[0]}
5Ô∏è‚É£ Most active production country: {df['country'].mode()[0]}
""")

In [None]:
# üíæ Step 7: Save Cleaned Dataset
df.to_csv("cleaned_netflix_data.csv", index=False)
print("File saved successfully!")

## üß† Step 8: Conclusion
- Netflix content has increased rapidly after 2015.
- Movies dominate slightly more than TV shows.
- Drama and Comedy are the most popular genres.
- USA and India are top contributors.
- Most content falls under PG-13 and TV-MA ratings.