In [None]:
# Movie rate analysis using pandas


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Generate fake movie data
df = pd.DataFrame({
    'Title': [f"Movie {i}" for i in range(1, 21)],
    'Genre': np.random.choice(['Action', 'Drama', 'Comedy'], 20),
    'Rating': np.round(np.random.uniform(5, 10, 20), 2),
    'Votes': np.random.randint(100, 2000, 20),
    'Year': np.random.randint(2000, 2024, 20)
})

print(df.head())

# 1️⃣ Average rating by genre
genre_rating = df.groupby('Genre')['Rating'].mean()
print("\nAverage Rating by Genre:\n", genre_rating)

# 2️⃣ Highly rated and popular
popular = df[df['Votes'] > 1000].sort_values('Rating', ascending=False)
print("\nPopular & Highly Rated:\n", popular)

# 3️⃣ Add weighted score (like IMDb uses)
# Weight = Votes * Rating
df['WeightedScore'] = df['Votes'] * df['Rating']
print("\nWith Weighted Score:\n", df.head())

# 4️⃣ Top movies by weighted score
top_weighted = df.sort_values('WeightedScore', ascending=False).head(5)
print("\nTop 5 Movies by Weighted Score:\n", top_weighted)

# 5️⃣ Ratings distribution histogram
plt.hist(df['Rating'], bins=5, edgecolor='black')
plt.title('Ratings Distribution')
plt.xlabel('Rating')
plt.ylabel('Frequency')
plt.show()

# 6️⃣ Votes vs. Rating scatter plot
plt.scatter(df['Votes'], df['Rating'])
plt.title('Votes vs. Rating')
plt.xlabel('Votes')
plt.ylabel('Rating')
plt.show()

# 7️⃣ Average votes per genre
genre_votes = df.groupby('Genre')['Votes'].mean()
print("\nAverage Votes by Genre:\n", genre_votes)

# 8️⃣ Number of movies per year
yearly_count = df.groupby('Year').size()
print("\nMovies per Year:\n", yearly_count)

yearly_count.plot(kind='bar', figsize=(10, 4))
plt.title('Number of Movies Released per Year')
plt.xlabel('Year')
plt.ylabel('Number of Movies')
plt.show()

# 9️⃣ Highest rated movie per genre
best_per_genre = df.loc[df.groupby('Genre')['Rating'].idxmax()]
print("\nBest Rated Movie in Each Genre:\n", best_per_genre[['Genre', 'Title', 'Rating']])
