In [7]:
""" Import Pandas and Scikit Learn to process data, and to construct the model """

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [8]:
""" Load in the data set """

file_path = '/Users/mazenalziq/PycharmProject/AI/Lumaa_AI/imdb_top_500.csv'
df = pd.read_csv(file_path)

In [9]:
""" Filter and extract relevant data """

required_columns = {'Series_Title', 'Overview', 'Genre'}
if not required_columns.issubset(df.columns):
    raise ValueError("Dataset must contain 'Series_Title', 'Overview', and 'Genre' columns.")
df = df[['Series_Title', 'Overview', 'Genre']].dropna()

# Rename columns for clarity
df.columns = ['title', 'description', 'genre']

# Combine description and genre into a single text field
df['combined_text'] = df['description'] + " " + df['genre']

In [10]:
""" Vectorize text using TF-IDF """

vectorizer = TfidfVectorizer(stop_words='english')
text_vectors = vectorizer.fit_transform(df['description'])

In [11]:
""" Use cosine similarity to determine movie recommendations """

def recommend_movies(user_input, top_n=5):
    """Given a user query, return the top N most similar movies based on description & genre."""
    user_vector = vectorizer.transform([user_input])
    similarities = cosine_similarity(user_vector, text_vectors).flatten()
    top_indices = similarities.argsort()[-top_n:][::-1]  # Get top matches
    
    recommendations = [(df.iloc[i]['title'], df.iloc[i]['genre'], similarities[i] * 100) for i in top_indices]  # Convert to percentage
    return recommendations

In [12]:
""" Get user input/query, display recommendations and accuracy """

user_query = input("Enter a movie preference (description or genre): ")
recommendations = recommend_movies(user_query)

print("\nTop Movie Recommendations:")
for title, genre, score in recommendations:
    print(f"{title} ({genre}): {score:.2f}% similarity")


Top Movie Recommendations:
The Message (Biography, Drama, History): 13.54% similarity
Dil Chahta Hai (Comedy, Drama, Romance): 13.24% similarity
Gone Girl (Drama, Mystery, Thriller): 12.41% similarity
Requiem for a Dream (Drama): 12.36% similarity
Togo (Adventure, Biography, Drama): 11.81% similarity
