# 1. Importing Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 2. Data Loading

In [None]:
# Load the dataset
data = pd.read_csv('netflix1.csv')

print(data.head())


# 3. Exploring Data

In [None]:
# Get a summary of the dataset
print(data.info())

# Get basic statistics of numerical columns
print(data.describe())

# Check for any missing values
print(data.isnull().sum())


# 4. Data Cleaning

In [None]:
# Drop any rows with missing values
data = data.dropna()



# Convert columns to appropriate data types if necessary
data['release_date'] = pd.to_datetime(data['release_date'], errors='coerce')

# Check the cleaned data
print(data.info())


# Total Content on Netflix

In [None]:
# Count the total number of content entries
total_content = data.shape[0]
print(f"Total Content on Netflix: {total_content}")


#  Top 10 countries with Netflix Content

In [None]:
# Count the number of content entries by country
top_10_countries = data['country'].value_counts().head(10)

# Display the results
print("Top 10 Countries with Netflix Content:")
print(top_10_countries)


# Monthly Trend of Movie and TV Show Releases

In [None]:
# Extract month and year from release_date
data['year'] = data['release_date'].dt.year
data['month'] = data['release_date'].dt.month

# Group by year and month, and count the number of releases
monthly_trend = data.groupby(['year', 'month']).size().reset_index(name='count')

# Plot the monthly trend
plt.figure(figsize=(12, 6))
sns.lineplot(data=monthly_trend, x='month', y='count', hue='year')
plt.title('Monthly Trend of Movie and TV Show Releases')
plt.xlabel('Month')
plt.ylabel('Number of Releases')
plt.show()


#  Yearly Trend of Movie and TV Show Releases

In [None]:
# Group by year and count the number of releases
yearly_trend = data.groupby('year').size().reset_index(name='count')

# Plot the yearly trend
plt.figure(figsize=(12, 6))
sns.barplot(data=yearly_trend, x='year', y='count', palette='viridis')
plt.title('Yearly Trend of Movie and TV Show Releases')
plt.xlabel('Year')
plt.ylabel('Number of Releases')
plt.show()


# Top 10 Movies

In [None]:
# Filter for movies and count the number of entries
top_10_movies = data[data['type'] == 'Movie'].nlargest(10, 'count')  # Replace 'count' with relevant column if needed

# Display the results
print("Top 10 Movies:")
print(top_10_movies[['title', 'count']])


# Top 10 TV Show

In [None]:
# Filter for TV shows and count the number of entries
top_10_tv_shows = data[data['type'] == 'TV Show'].nlargest(10, 'count')  # Replace 'count' with relevant column if needed
# Display the results
print("Top 10 TV Shows:")
print(top_10_tv_shows[['title', 'count']])


#  Top_10_Directors

In [None]:
# Count the number of movies and TV shows by director
top_10_directors = data['director'].value_counts().head(10)

# Display the results
print("Top 10 Directors:")
print(top_10_directors)
