In [10]:
import pandas as pd
import requests
import csv
from urllib.parse import quote

# Your OMDb API Key
from api_keys import omdb_api_key

In [6]:
# Load movie titles from CSV
df = pd.read_csv('Netflix files.csv')  # Adjust path if needed
movie_titles = df['title'].dropna().unique()

In [7]:
# Function to fetch data from OMDb
def fetch_movie_data(title):
    url = f'http://www.omdbapi.com/?apikey={omdb_api_key}&t={quote(title)}'
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Error fetching {title}: Status code {response.status_code}")
        return None

    data = response.json()
    
    if data.get('Response') == 'True':
        return {
            'Title': data.get('Title'),
            'Year': data.get('Year'),
            'IMDb Rating': data.get('imdbRating'),
            'IMDb Votes': data.get('imdbVotes')
        }
    else:
        print(f"Movie not found: {title}")
        return None

In [9]:

# Create and write to CSV
with open('omdb_movie_data.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=['Title', 'Year', 'IMDb Rating', 'IMDb Votes'])
    writer.writeheader()

    for title in movie_titles:
        movie_data = fetch_movie_data(title)
        if movie_data:
            writer.writerow(movie_data)

print("Done! Movie data written to 'omdb_movie_data.csv'")


Movie not found: Little Singham - Black Shadow
Movie not found: Tughlaq Durbar (Telugu)
Movie not found: Firedrake the Silver Dragon
Movie not found: Untold: Crime & Penalties
Movie not found: Boomika (Hindi)
Movie not found: Boomika (Malayalam)
Movie not found: Boomika (Telugu)
Movie not found: I missed you: Director's Cut
Movie not found: Rurouni Kenshin: The Beginning
Movie not found: Fear Street Part 3: 1666
Movie not found: Bridgerton - The Afterparty
Movie not found: Fear Street Part 2: 1978
Movie not found: How I Became a Superhero
Movie not found: Fear Street Part 1: 1994
Movie not found: Rurouni Kenshin: The Final
Movie not found: Ali & Ratu Ratu Queens
Movie not found: Bling Empire - The Afterparty
Movie not found: Cobra Kai - The Afterparty
Movie not found: To All the Boys: Always and Forever - The Afterparty
Movie not found: Black Holes | The Edge of All We Know
Movie not found: Dog Gone Trouble
Movie not found: 99 Songs (Tamil)
Movie not found: 99 Songs (Telugu)
Movie not 

In [None]:
#clean and merge netflix data and omdb data

# Load CSV files
omdb_df = pd.read_csv("omdb_movie_data.csv")
netflix_df = pd.read_csv("Netflix files.csv")

# Standardize column names
omdb_df.columns = omdb_df.columns.str.strip().str.lower().str.replace(" ", "_")
netflix_df.columns = netflix_df.columns.str.strip().str.lower().str.replace(" ", "_")

# Clean and convert OMDB data
omdb_df['imdb_votes'] = omdb_df['imdb_votes'].str.replace(",", "").astype(int)
omdb_df['imdb_rating'] = omdb_df['imdb_rating'].astype(float)

# Convert Netflix duration to numeric (minutes only)
def extract_minutes(duration):
    if isinstance(duration, str) and "min" in duration:
        try:
            return int(duration.split(" ")[0])
        except:
            return None
    return None

netflix_df['duration_minutes'] = netflix_df['duration'].apply(extract_minutes)

# Only keep rows in Netflix that are also in OMDB
merged_df = pd.merge(netflix_df, omdb_df, how='inner', on='title')

# Keep only Netflix columns that match with OMDB (plus useful ones)
columns_to_keep = ['title', 'director', 'cast', 'rating', 'duration_minutes', 'description']
merged_df = merged_df[columns_to_keep + ['year', 'imdb_rating', 'imdb_votes']]

# Save the cleaned and merged data
merged_df.to_csv("cleaned_merged_movies.csv", index=False)

print("✅ Cleaned & merged CSV saved as 'cleaned_merged_movies.csv'")


✅ Cleaned & merged CSV saved as 'cleaned_merged_movies.csv'
