In [208]:
import pandas as pd

In [215]:
data = pd.read_parquet("small_dataset.parquet")

In [313]:
import numpy as np
import time
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.pipeline import Pipeline
import joblib
import os

In [314]:
def build_recommendations_model(data):

    try:
        start_time = time.time()
        print("Building recommendation model...")

        os.makedirs('models', exist_ok=True)
    
        #Removes common English words like "the," "and," "is" 
        tfidf = TfidfVectorizer(stop_words='english')
    
        tfidf_matrix = tfidf.fit_transform(data['combined_features'])
    
        joblib.dump(tfidf,'models/tfidf_vectorizer.joblib')
    
        indices = pd.Series(data.index,index=data['title'].str.lower()).drop_duplicates()
    
        joblib.dump(indices,'models/indices_mapping.joblib')
    
        print("Computing cosine similarity matrix...")
        cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    
        # Save the similarity matrix
        print("Saving model files...")
        joblib.dump(cosine_sim, 'models/cosine_sim_matrix.joblib')
    
        print(f"Model building completed in {time.time() - start_time:.2f} seconds")
    
        load_recommendation_model()

        return cosine_sim, indices
        
    except Exception as e:
        print(f"Error building model: {e}")
        return None, None, None

In [315]:
def load_recommendation_model():

    # Load the movie data
    data = pd.read_parquet('small_dataset.parquet')
    
    try:
        
        # Load the similarity matrix
        cosine_sim = joblib.load('models/cosine_sim_matrix.joblib')

         # Load the indices mapping
        indices = joblib.load('models/indices_mapping.joblib')

        return cosine_sim, indices, data

    except FileNotFoundError:
        
        print("Model files not found. Building model...")
        return build_recommendations_model(data)
        

In [328]:
def get_recommendations(title, top_n=5):

    cosine_sim,indices,data = load_recommendation_model()

     # Check if any component is missing
    if cosine_sim is None or indices is None or movie_data is None:
        return ("Model not found. Please build the model first.")

    title = title.lower()

    if title not in indices:
        print(f"Movie '{title}' not found in the database!! Sorry for the inconvenience\nHere are other top movies you can enjoy :)")

        # Return popular movies as fallback
        return data.sort_values('vote_average', ascending=False)['title'].head(top_n)
    
    index = indices[title]

    sim_scores= list(enumerate(cosine_sim[index]))

    sim_scores = sorted(sim_scores,key=lambda x:x[1],reverse=True)

    sim_scores = sim_scores[1:11]

    movies_indices = [i[0] for i in sim_scores]

    return data['title'].iloc[movies_indices]

In [329]:
def main(movie):

    try:
        print(f"\nRecommendations for movie '{movie}': ")
        recommendations = get_recommendations(movie,top_n=5)
        print(recommendations)

        
           
    except Exception as e:
        print(f"Error getting recommendations for '{movie}': {e}")

In [333]:
main("Inception")


Recommendations for movie 'Inception': 
8914                                         The Cell
7213                             The Thirteenth Floor
16517                                  The Good Night
8025     The Human Mind Is A Beautiful Place at Night
24071                              Virtual Revolution
13694                                          Cypher
10691                        Inception: The Cobol Job
8282                                         Altitude
883                                        The Matrix
11805                                      Homunculus
Name: title, dtype: object
