<a href="https://colab.research.google.com/github/SaiKrishnaReddyDaka14/lumaa-spring-2025-ai-ml/blob/main/Simple_Content_Based_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Name:** Sai Krishna Reddy Daka

**Role:** Lumaa AI/Machine Learning Intern


In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
# Load the dataset
file_path = "imdb_top_250.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Select only the 'Title' and 'Genre' columns from the dataset
df = df[['Title', 'Genre']]

In [3]:
# Get user input for the movie query
user_input = input("Enter your query (e.g., 'I love thriller movies'):\n")

# Print the user's input query for verification
print("\nUser Input:", user_input)

Enter your query (e.g., 'I love thriller movies'):
I love thriller movies

User Input: I love thriller movies


In [4]:
# Combine the user input (query) and movie genres to form a list of texts for vectorization
genre_list = df['Genre'].tolist()

# List of movie genres from the Dataframe + user input
all_texts = genre_list + [user_input]

In [5]:
# Initialize the TF-IDF vectorizer (to convert text into numerical vectors)
vectorizer = TfidfVectorizer()

# Convert the list of texts into a matrix of TF-IDF features (numerical representation of text)
tfidf_matrix = vectorizer.fit_transform(all_texts)

In [6]:
# Compute the cosine similarity between the user input and all movie genres
similarity_scores = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1])  # Compare user input to each genre

# Get the indices of the top 5 most similar movies by sorting the similarity scores
top_n = 5  # Number of recommendations to return

# Sorts the similarity scores in ascending order, gets the top 5 indices
top_indices = similarity_scores[0].argsort()[-top_n:]

# Now reverses to get them in descending order of similarity
top_indices = top_indices[::-1]

In [7]:
# Retrieve the movies corresponding to the top N indices
recommended_movies = df.iloc[top_indices]  # Get rows from the dataset corresponding to top 5 indices

# Display the recommended movie titles
print("\nRecommended Movies:")
print(recommended_movies[['Title', 'Genre']])


Recommended Movies:
                     Title             Genre
153           Blade Runner          Thriller
212          Amores perros  Drama | Thriller
224           Donnie Darko  Drama | Thriller
57   Das Leben der Anderen  Drama | Thriller
148                   Room  Drama | Thriller
