In [17]:
print ("Project on Movie Prediction")

Project on Movie Prediction


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

# Load Movie Dataset (Load only required columns)
movies = pd.read_csv("C:/Users/hp/movie.csv", usecols=["movieId", "title", "genres"])

# Fill NaN values
movies['genres'] = movies['genres'].fillna('')

# Reduce Dataset Size (Take only 10,000 movies)
movies = movies.head(10000)  # <--  first 10,000 movies  will take 

# Convert Genres into Features (Sparse Matrix)
tfidf = TfidfVectorizer(stop_words="english", max_features=5000)  # Max 5000 words to save memory
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Use Nearest Neighbors (No Full Cosine Similarity Matrix)
model = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=6)
model.fit(tfidf_matrix)

#Create Movie Index
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

# Define Efficient Recommendation Function
def recommend_movies(title, num_recommendations=5):
    if title not in indices:
        return ["Movie not found!"]
    
    idx = indices[title]
    distances, indices_list = model.kneighbors(tfidf_matrix[idx], n_neighbors=num_recommendations+1)
    
    recommended_movies = movies['title'].iloc[indices_list[0][1:]].tolist()
    
    return recommended_movies

#Take User Input
movie_name = input("\n🎬 Enter a movie name: ")  # <--  this will take input of movie name from user 
recommended_movies = recommend_movies(movie_name)

# Display Output
print(f"\n🔥 If you liked '{movie_name}', you might also like:")
for i, movie in enumerate(recommended_movies, start=1):
    print(f"{i}. {movie}")
