In [1]:
# 📦 Install required libraries (if not already)
!pip install scikit-learn pandas --quiet

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load IMDb Data
def load_imdb_data():
    # Download from IMDb: https://datasets.imdbws.com/
    basics = pd.read_csv('title.basics.tsv.gz', sep='\t', na_values='\\N', compression='gzip')
    ratings = pd.read_csv('title.ratings.tsv.gz', sep='\t', na_values='\\N', compression='gzip')

    # Filter only TV series and clean
    tv_shows = basics[(basics['titleType'] == 'tvSeries') & (basics['isAdult'] == 0)]
    tv_shows = tv_shows[['tconst', 'primaryTitle', 'genres', 'startYear']].dropna()

    # Merge with ratings
    merged = pd.merge(tv_shows, ratings, on='tconst')
    return merged

# Step 2: Recommend shows using TF-IDF on genres
def recommend_shows(df, user_genre, mood, top_n=10):
    # Combine genre and mood into one pseudo-genre string
    mood_genres = {
        "happy": "Comedy Family",
        "dark": "Thriller Crime Mystery",
        "romantic": "Romance Drama",
        "excited": "Action Adventure",
        "thoughtful": "Sci-Fi Fantasy"
    }
    mood_extension = mood_genres.get(mood.lower(), "")
    user_profile = user_genre + " " + mood_extension

    # Vectorize genres
    tfidf = TfidfVectorizer()
    genre_matrix = tfidf.fit_transform(df['genres'].fillna(""))

    # Transform user input
    user_vec = tfidf.transform([user_profile])

    # Compute cosine similarity
    similarity_scores = cosine_similarity(user_vec, genre_matrix).flatten()
    df['similarity'] = similarity_scores

    # Top matches
    top_matches = df.sort_values(by=['similarity', 'averageRating'], ascending=False).head(top_n)
    return top_matches[['primaryTitle', 'genres', 'averageRating', 'startYear']]

# Step 3: User Interaction
def main():
    print("🎬 TV Show Recommender (IMDb-Based)\n")
    genre = input("Enter your favorite genre (e.g., Comedy, Drama, Sci-Fi): ")
    mood = input("What is your mood? (happy, dark, romantic, excited, thoughtful): ")

    print("\n🔍 Loading and processing IMDb data...")
    df = load_imdb_data()

    print("\n📺 Top TV Show Recommendations for you:\n")
    recommendations = recommend_shows(df, genre, mood)
    for i, row in recommendations.iterrows():
        print(f"{i+1}. {row['primaryTitle']} ({row['startYear']}) - {row['genres']} [IMDb: {row['averageRating']}]")

# Run the recommender
main()


🎬 TV Show Recommender (IMDb-Based)

Enter your favorite genre (e.g., Comedy, Drama, Sci-Fi): Drama
What is your mood? (happy, dark, romantic, excited, thoughtful): Dark

🔍 Loading and processing IMDb data...


  basics = pd.read_csv('title.basics.tsv.gz', sep='\t', na_values='\\N', compression='gzip')



📺 Top TV Show Recommendations for you:

56082. From High Up (2022.0) - Crime,Mystery,Thriller [IMDb: 8.8]
39236. Taqdeer (2020.0) - Crime,Mystery,Thriller [IMDb: 8.6]
49883. The Bridge (2011.0) - Crime,Mystery,Thriller [IMDb: 8.6]
75783. 9th Floor (2024.0) - Crime,Mystery,Thriller [IMDb: 8.6]
54767. Special Squad (2005.0) - Crime,Mystery,Thriller [IMDb: 8.3]
17526. ¿Es usted el asesino? (1967.0) - Crime,Mystery,Thriller [IMDb: 8.2]
56078. Kaiser (2022.0) - Crime,Mystery,Thriller [IMDb: 8.2]
57975. The Children of Evil (2023.0) - Crime,Mystery,Thriller [IMDb: 8.2]
62191. The Twisted Doll (2024.0) - Crime,Mystery,Thriller [IMDb: 8.2]
34023. Leverage: Redemption (2021.0) - Crime,Mystery,Thriller [IMDb: 8.1]
