In [None]:
import pandas as pd

df = pd.read_csv('imbd.csv')
df.head()

In [None]:
# Convert 'Year' column to numeric
df['Year'] = pd.to_numeric(df['Year'], errors='coerce')

# Group by 'Year' and calculate mean 'imdbRating'
avg_ratings_by_year = df.groupby('Year')['imdbRating'].mean().reset_index()

# Plotting
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(avg_ratings_by_year['Year'], avg_ratings_by_year['imdbRating'], marker='o')
plt.title('Average IMDB Ratings Over the Years')
plt.xlabel('Year')
plt.ylabel('Average IMDB Rating')
plt.grid(True)
plt.show()

In [None]:
from collections import Counter

# Split 'Genre' into separate genres and count occurrences
genre_counts = Counter(', '.join(df['Genre']).split(', '))

# Convert to DataFrame for easier manipulation
df_genre_counts = pd.DataFrame.from_dict(genre_counts, orient='index').reset_index()
df_genre_counts.columns = ['Genre', 'Count']

# Sort by count
df_genre_counts = df_genre_counts.sort_values('Count', ascending=False)

# Plotting
plt.figure(figsize=(10, 6))
plt.barh(df_genre_counts['Genre'], df_genre_counts['Count'], color='skyblue')
plt.title('Most Common Genres in Top 250 Movies')
plt.xlabel('Count')
plt.ylabel('Genre')
plt.gca().invert_yaxis()
plt.show()

In [None]:
# Handle missing or incorrect values in 'Genre'
df['Genre'] = df['Genre'].apply(lambda x: 'Unknown' if pd.isnull(x) else x)

# Split 'Genre' into separate genres and count occurrences
genre_counts = Counter(', '.join(df['Genre']).split(', '))

# Convert to DataFrame for easier manipulation
df_genre_counts = pd.DataFrame.from_dict(genre_counts, orient='index').reset_index()
df_genre_counts.columns = ['Genre', 'Count']

# Sort by count
df_genre_counts = df_genre_counts.sort_values('Count', ascending=False)

# Plotting
plt.figure(figsize=(10, 6))
plt.barh(df_genre_counts['Genre'], df_genre_counts['Count'], color='skyblue')
plt.title('Most Common Genres in Top 250 Movies')
plt.xlabel('Count')
plt.ylabel('Genre')
plt.gca().invert_yaxis()
plt.show()

In [None]:
# Fetch data for one movie
movie_data = fetch_movie_data('The Shawshank Redemption')
pprint(movie_data)