In [1]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Load movie data from CSV files
movies_data = pd.read_csv('movies.csv')
ratings_data = pd.read_csv('ratings.csv')  # Assuming you have a ratings dataset

# Surprise requires a specific format for ratings data
reader = Reader(rating_scale=(1, 5))  # Ratings scale from 1 to 5
data = Dataset.load_from_df(ratings_data[['userId', 'movieId', 'rating']], reader)

In [2]:
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Initialize and train the SVD model
model_svd = SVD()
model_svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x15b7a7cf290>

In [3]:
# Function to recommend movies based on a given movie title
def Recommend_movies_svd(movie_title, top_n=11):
   
    # Find the movie ID corresponding to the input movie title
    movie_id = movies_data.loc[movies_data['title'] == movie_title, 'movieId'].iloc[0]
    
    # Get the latent factors (features) for the input movie from the trained SVD model
    movie_factors = model_svd.qi[model_svd.trainset.to_inner_iid(movie_id)]
    
    similarities = []
    # Iterate over all items (movies) in the training set of the SVD model
    for inner_id in model_svd.trainset.all_items():
        # Get the latent factors (features) for the current movie
        other_movie_factors = model_svd.qi[inner_id]
        
        # Calculate the cosine similarity between the input movie and the current movie
        similarity = sum(movie_factors * other_movie_factors)
        
        # Append the movie ID and its similarity to the input movie to the list
        similarities.append((model_svd.trainset.to_raw_iid(inner_id), similarity))
    
    # Sort movies by similarity in descending order (higher similarity means more similar)
    similarities.sort(key=lambda x: x[1], reverse=True)
    
     # Extract the top recommended movie IDs (excluding the input movie itself)
    recommended_movie_ids = [sim[0] for sim in similarities[:top_n] if sim[0] != movie_id]
    
    # Get movie titles based on recommended IDs
    recommended_movies = movies_data[movies_data['movieId'].isin(recommended_movie_ids)]['title'].values
    
    return recommended_movies


In [4]:
movies_data

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [5]:
import pickle
filename = 'model_svd.pkl'

with open(filename, 'wb') as file:
    pickle.dump(model_svd, file)

filename2 = 'movies_data.pkl'

with open(filename2, 'wb') as file:
    pickle.dump(movies_data, file)



In [6]:
movie_title = 'Ghost in the Shell (2017)'
recommended_movies = Recommend_movies_svd(movie_title)
print(f"Recommended movies for '{movie_title}':")
for movie in recommended_movies:
    print(movie)


Recommended movies for 'Ghost in the Shell (2017)':
Crying Game, The (1992)
Indian Summer (a.k.a. Alive & Kicking) (1996)
Interiors (1978)
International, The (2009)
My Life in Ruins (2009)
Sex and the City 2 (2010)
Tammy (2014)
Exodus: Gods and Kings (2014)
Insurgent (2015)
My Scientology Movie (2016)


In [7]:
trainset, testset = train_test_split(data, test_size=0.2)

model_svd = SVD()

model_svd.fit(trainset)

predictions = model_svd.test(testset)

rmse = accuracy.rmse(predictions)

print(f"RMSE: {rmse}")

RMSE: 0.8729
RMSE: 0.8728899317493092


In [8]:
def calculate_accuracy(predictions, tolerance=1):
    correct_predictions = 0
    total_predictions = len(predictions)

    for prediction in predictions:
        actual_rating = prediction.r_ui
        predicted_rating = prediction.est

        if abs(predicted_rating - actual_rating) <= tolerance:
            correct_predictions += 1

    accuracy_percentage = (correct_predictions / total_predictions) * 100
    return accuracy_percentage


In [9]:
predictions = model_svd.test(testset)
accuracy_percentage = calculate_accuracy(predictions, tolerance=1)

print(f"Accuracy: {accuracy_percentage:.2f}%")


Accuracy: 77.43%
