In [1]:
import h5py
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity
import joblib

In [2]:
# Load the cleaned data
df = pd.read_csv('cleaned_data_final.csv')

# Load the TF-IDF matrix
with h5py.File('tfidf_matrix.h5', 'r') as hdf:
    tfidf_matrix = hdf['tfidf_matrix'][:]

# Load the Cosine Similarity matrix
with h5py.File('cosine_sim_matrix.h5', 'r') as hdf:
    cosine_sim = hdf['cosine_sim_matrix'][:]

# Load the SVD matrix
with h5py.File('svd_matrix.h5', 'r') as hdf:
    svd_matrix = hdf['svd_matrix'][:]

# Load the SVD model
svd = joblib.load('svd_model.pkl')

  df = pd.read_csv('cleaned_data_final.csv')


In [3]:
# Function for Hybrid Recommendation: Combine content-based and collaborative filtering
def hybrid_recommendation(title, df, svd_matrix, cosine_sim_matrix, alpha=0.5, top_n=10):
    # Find the index of the movie that matches the title
    idx = df.index[df['title'].str.lower() == title.lower()].tolist()[0]
    
    # Compute cosine similarity in the reduced space (using SVD matrix)
    svd_sim = cosine_similarity(svd_matrix[idx].reshape(1, -1), svd_matrix).flatten()
    
    # Compute hybrid score by combining content-based similarity with SVD-based similarity
    hybrid_scores = alpha * cosine_sim_matrix[idx] + (1 - alpha) * svd_sim
    
    # Get top n recommendations, ignoring the movie itself
    top_indices = np.argsort(hybrid_scores)[-top_n-1:-1][::-1]
    
    return df.iloc[top_indices]

In [5]:
 # Example: Recommend movies using hybrid method
recommendations = hybrid_recommendation('anbe sivam', df, svd_matrix, cosine_sim, alpha=0.7)
print(recommendations[['title', 'audienceScore', 'tomatoMeter', 'genre', 'sentiment_score']])

                           title  audienceScore  tomatoMeter  \
23018  as in heaven, so on earth       0.583491     0.657638   
23032  all's well ends well 2012       0.260000     0.657638   
23031               jour de fete       0.830000     1.000000   
23030           ephraim's rescue       0.830000     0.657638   
23029             the destroyers       0.750000     0.657638   
23028         down with the king       0.583491     0.657638   
23027        butterfly and sword       0.500000     0.657638   
23026          run for the money       0.220000     0.000000   
23025     shattering the silence       0.583491     0.657638   
23024        cantantes en guerra       0.583491     0.657638   

                               genre  sentiment_score  
23018  animation, mystery & thriller         0.550000  
23032                         comedy         0.511111  
23031                         comedy         0.266667  
23030                          drama         0.500000  
23029          