In [None]:
# Task 1: Load and Explore the Dataset
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv("Netflix.csv")

# Display the first few rows
print("First 5 rows of the dataset:")
display(df.head())

# Check data structure and types
print("\nData types:")
print(df.dtypes)

# Check for missing values
print("\nMissing values:")
print(df.isnull().sum())

# Clean dataset: drop or fill missing values
df = df.dropna()  # or you can use df.fillna(method='ffill') if appropriate

: 

In [None]:
# Task 2: Basic Data Analysis

# Descriptive statistics
print("\nDescriptive statistics:")
display(df.describe())

# Group by Rating and show average IMDB rating
print("\nAverage IMDB Rating by Rating Category:")
print(df.groupby('Rating')['IMDB_Rating'].mean())

# Group by Netflix availability
print("\nNumber of Shows on Netflix vs Not on Netflix:")
print(df['Netflix'].value_counts())

In [None]:
# Task 3: Data Visualization

# Set the visual theme
sns.set(style="whitegrid")

# 1. Line Chart: Number of shows released per year
plt.figure(figsize=(8, 5))
df.groupby('Year').size().plot(marker='o', color='teal')
plt.title("Shows Released Per Year")
plt.xlabel("Year")
plt.ylabel("Number of Shows")
plt.tight_layout()
plt.show()

# 2. Bar Chart: Average IMDB by Age Rating
plt.figure(figsize=(7, 5))
df.groupby('Rating')['IMDB_Rating'].mean().sort_values().plot(kind='bar', color='coral')
plt.title("Average IMDB Rating by Age Rating")
plt.xlabel("Rating")
plt.ylabel("IMDB Rating")
plt.tight_layout()
plt.show()

# 3. Histogram: IMDB Ratings Distribution
plt.figure(figsize=(7, 5))
sns.histplot(df['IMDB_Rating'], bins=10, kde=True, color='purple')
plt.title("Distribution of IMDB Ratings")
plt.xlabel("IMDB Rating")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

# 4. Scatter Plot: Year vs IMDB Rating (with Netflix hue)
plt.figure(figsize=(8, 5))
sns.scatterplot(data=df, x="Year", y="IMDB_Rating", hue="Netflix", palette={1: "red", 0: "blue"})
plt.title("IMDB Ratings Over Time (Netflix = Red)")
plt.xlabel("Year")
plt.ylabel("IMDB Rating")
plt.legend(title="On Netflix")
plt.tight_layout()
plt.show()
