In [29]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the sample dataset
movies_en = pd.read_csv('English_movies.csv')

# Clean the data (strip extra spaces and convert to lowercase)
movies_en['MovieName'] = movies_en['MovieName'].str.strip().str.lower()
movies_en['tags'] = movies_en['tags'].str.strip()

# Combine Genre, MovieName, and Tags into a single string for each movie
movies_en['combined_features'] = movies_en['Genre'] + " " + movies_en['MovieName'] + " " + movies_en['tags']

# Initialize the TfidfVectorizer and fit_transform it on the combined features
tfidf_en = TfidfVectorizer(stop_words='english')
tfidf_matrix_en = tfidf_en.fit_transform(movies_en['combined_features'])

# Compute cosine similarity between all movies
cosin_sim_en = cosine_similarity(tfidf_matrix_en, tfidf_matrix_en)

def get_recommendation_en(title):
    title_lower = title.lower()  # Normalize title to lowercase
    
    # Check if movie exists in the dataset
    if title_lower not in movies_en['MovieName'].str.lower().values:
        return "English Movie not found."
    
    # Get the index of the movie
    idx = movies_en[movies_en['MovieName'].str.lower() == title_lower].index[0]
    
    # Get similarity scores of the movie with others
    sim_scores = list(enumerate(cosin_sim_en[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the indices of the top 10 most similar movies
    sim_scores = sim_scores[1:11]  # Exclude the movie itself
    movie_indices = [i[0] for i in sim_scores]
    
    # Return the movie names from the top 10 most similar movies
    return movies_en['MovieName'].iloc[movie_indices].tolist()


title = input("Enter the movie title: ").strip()

recommendations = get_recommendation_en(title)


if isinstance(recommendations, list):
    if recommendations:
        print("Recommended Movies:")
        for movie in recommendations:
            print(movie)
    else:
        print("No recommendations found.")
else:
    print(recommendations)


Recommended Movies:
memento
the matrix reloaded
the matrix revolutions
primer
the dark knight rises
g.i. joe: the rise of cobra
looper
mad max: fury road
star trek: nemesis
interstellar


In [27]:
import pickle

# Save English movie dataset and cosine similarity matrix
with open('English_Movie_Recommendation.pkl', 'wb') as file:
    pickle.dump((movies_en, cosin_sim_en), file)