In [None]:
import pandas as pd

df = pd.read_csv('imbd.csv')
df.head()

In [None]:
# Convert 'Year' column to numeric
df['Year'] = pd.to_numeric(df['Year'], errors='coerce')

# Group by 'Year' and calculate mean 'imdbRating'
avg_ratings_by_year = df.groupby('Year')['imdbRating'].mean().reset_index()

# Plotting
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(avg_ratings_by_year['Year'], avg_ratings_by_year['imdbRating'], marker='o')
plt.title('Average IMDB Ratings Over the Years')
plt.xlabel('Year')
plt.ylabel('Average IMDB Rating')
plt.grid(True)
plt.show()

In [None]:
# Handle missing or incorrect values in 'Genre'
df['Genre'] = df['Genre'].apply(lambda x: 'Unknown' if pd.isnull(x) else x)

# Split 'Genre' into separate genres and count occurrences
genre_counts = Counter(', '.join(df['Genre']).split(', '))

# Convert to DataFrame for easier manipulation
df_genre_counts = pd.DataFrame.from_dict(genre_counts, orient='index').reset_index()
df_genre_counts.columns = ['Genre', 'Count']

# Sort by count
df_genre_counts = df_genre_counts.sort_values('Count', ascending=False)

# Plotting
plt.figure(figsize=(10, 6))
plt.barh(df_genre_counts['Genre'], df_genre_counts['Count'], color='skyblue')
plt.title('Most Common Genres in Top 250 Movies')
plt.xlabel('Count')
plt.ylabel('Genre')
plt.gca().invert_yaxis()
plt.show()

In [None]:
import requests
import os

def fetch_movie_data(title):
    # Retrieve API key from environment variables
    api_key = os.getenv('OMDB_API_KEY')

    # Define the base URL of the OMDB API
    base_url = 'http://www.omdbapi.com/'

    # Define the parameters for the API request
    params = {
        'apikey': api_key,
        't': title
    }

    # Send GET request to the OMDB API
    response = requests.get(base_url, params=params)

    # Return the JSON response
    return response.json()

# Fetch data for one movie
movie_data = fetch_movie_data('The Shawshank Redemption')
movie_data

In [None]:
# Handle missing or incorrect values in 'Runtime'
df['Runtime'] = df['Runtime'].apply(lambda x: '0' if pd.isnull(x) else x)
df['Runtime'] = df['Runtime'].str.replace(' min', '').astype(int)

# Plotting
plt.figure(figsize=(10, 6))
plt.hist(df['Runtime'], bins=20, color='skyblue', edgecolor='black')
plt.title('Distribution of Runtimes in Top 250 Movies')
plt.xlabel('Runtime (minutes)')
plt.ylabel('Count')
plt.show()

In [None]:
# Split 'Country' into separate countries and count occurrences
country_counts = Counter(', '.join(df['Country']).split(', '))

# Convert to DataFrame for easier manipulation
df_country_counts = pd.DataFrame.from_dict(country_counts, orient='index').reset_index()
df_country_counts.columns = ['Country', 'Count']

# Sort by count
df_country_counts = df_country_counts.sort_values('Count', ascending=False)

# Plotting
plt.figure(figsize=(10, 6))
plt.barh(df_country_counts['Country'], df_country_counts['Count'], color='skyblue')
plt.title('Most Common Countries of Origin in Top 250 Movies')
plt.xlabel('Count')
plt.ylabel('Country')
plt.gca().invert_yaxis()
plt.show()