In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel


In [2]:
df = pd.read_csv(r"C:\Users\INDIAN\Downloads\movies.csv")
df.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bondâ€™s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [3]:
# Fill NaN in overview
df['overview'] = df['overview'].fillna('')

# Combine genres and overview for better recommendations
df['combined_features'] = df['genres'].fillna('') + ' ' + df['overview']


In [5]:
# Convert text to TF-IDF features
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined_features'])

# Check matrix shape
print("TF-IDF Matrix shape:", tfidf_matrix.shape)


TF-IDF Matrix shape: (4803, 20978)


In [6]:
# Compute cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)


In [7]:
def recommend_movies(title, num_recommendations=5):
    if title not in indices:
        print(f"Movie '{title}' not found in dataset.")
        return []
    
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
    movie_indices = [i[0] for i in sim_scores]
    
    return df['title'].iloc[movie_indices]


In [11]:
def precision_at_k(query_title, recommended_titles, k=5):
    try:
        query_genres = set(df[df['title'] == query_title]['genres'].iloc[0].split(', '))
        precision_hits = 0
        for title in recommended_titles:
            rec_genres = set(df[df['title'] == title]['genres'].iloc[0].split(', '))
            if query_genres & rec_genres:
                precision_hits += 1
        return precision_hits / k
    except:
        return 0.0


In [12]:
# This is the step you missed!
indices = pd.Series(df.index, index=df['title']).drop_duplicates()


In [13]:
movie = "Inception"
recs = recommend_movies(movie, 5)
print("Recommendations:", list(recs))
print("Precision@5:", precision_at_k(movie, recs))


Recommendations: ['Cypher', 'The Helix... Loaded', 'Mission: Impossible - Rogue Nation', 'Flatliners', 'Renaissance']
Precision@5: 0.0


In [14]:
# Test with a movie
movie_name = "The Dark Knight"
print(f"\nRecommended movies for: {movie_name}")
print(recommend_movies(movie_name, num_recommendations=5))



Recommended movies for: The Dark Knight
3                         The Dark Knight Rises
428                              Batman Returns
3854    Batman: The Dark Knight Returns, Part 2
299                              Batman Forever
119                               Batman Begins
Name: title, dtype: object


In [15]:
pip install gradio


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [None]:
import gradio as gr

def movie_suggester(input_movie):
    if input_movie not in indices:
        return f"Movie '{input_movie}' not found. Please check the spelling or try another title."
    
    recommendations = recommend_movies(input_movie, 5)
    output = f"Top 5 movies similar to '{input_movie}':\n\n"
    for i, title in enumerate(recommendations, 1):
        output += f"{i}. {title}\n"
    
    return output

# Launch UI
gr.Interface(fn=movie_suggester,
             inputs=gr.Textbox(label="Enter a Movie Title"),
             outputs="text",
             title="ðŸŽ¬ Samyuktha's Movie Recommendation System",
             description="Enter a movie name to get 5 similar recommendations (Content-Based)").launch()


* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.


