<a href="https://colab.research.google.com/github/CarlosVick1/Recommendation-Systems/blob/main/recSys3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. Import Libraries

In [None]:
pip install scikit-surprise



In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from surprise import Dataset, Reader, SVDpp
from surprise.model_selection import cross_validate
from surprise import accuracy

## 2. Dataset (Insert, Display and Clean)

In [None]:
# Load MovieLens dataset

df = pd.read_excel('/content/movieRatings_cleaned_sample.xlsx')

In [None]:
df.head()

Unnamed: 0,userId,movieId,title,genres,rating
0,1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,4.0
1,1,110,Braveheart (1995),Action|Drama|War,4.0
2,1,158,Casper (1995),Adventure|Children,4.0
3,1,260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,4.5
4,1,356,Forrest Gump (1994),Comedy|Drama|Romance|War,5.0


In [None]:
# Drop NaN values in the 'genres' column
df = df.dropna(subset=['genres'])

## 3. Dataset Training and Testing

In [None]:
# Split the data into training and testing sets
train_data, test_data = train_test_split(df, test_size=0.25, random_state=42)

In [None]:
# Surprise: Load the data and create a surprise dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(train_data[['userId', 'movieId', 'rating']], reader)

## 4. Build and Train the Models

In [None]:
# Build and train the SVD++ model
model_svdpp = SVDpp()
cross_validate(model_svdpp, data, measures=['RMSE'], cv=5, verbose=True)
model_svdpp.fit(data.build_full_trainset())

Evaluating RMSE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8916  0.8917  0.9014  0.9083  0.8937  0.8973  0.0066  
Fit time          38.16   38.01   38.07   38.84   39.36   38.49   0.53    
Test time         4.33    5.32    4.33    4.44    5.24    4.73    0.45    


<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x7db864f96e90>

In [None]:
# TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train = vectorizer.fit_transform(train_data['genres'])
X_test = vectorizer.transform(test_data['genres'])

In [None]:
# Linear Regression on TF-IDF features
lr_model = LinearRegression()
lr_model.fit(X_train, train_data['rating'])

## 5. Merge and Evaluate Models  

In [None]:
# Make predictions using both models
svdpp_predictions = [model_svdpp.predict(row['userId'], row['movieId']).est for _, row in test_data.iterrows()]
tfidf_predictions = lr_model.predict(X_test)

In [None]:
# Combine predictions (simple average in this example)
combined_predictions = (svdpp_predictions + tfidf_predictions) / 2

In [None]:
# Evaluate the combined model
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(test_data['rating'], combined_predictions)
print(f'Combined Model Mean Squared Error: {mse}')

Combined Model Mean Squared Error: 0.827604867158851


## 6. Hybrid Recommendation Function

In [None]:
import random

# Function to get hybrid recommendations

def get_hybrid_recommendations(user_id, top_n=10):
    user_seen_movies = df[df['userId'] == user_id]['movieId'].tolist()
    candidate_movies = df[df['movieId'].isin(user_seen_movies)]['movieId'].tolist()

    #Unseen_movies = df[~df['movieId'].isin(user_seen_movies)]['movieId'].tolist()

    #all_movies = candidate_movies + Unseen_movies
    #random.shuffle(all_movies)

    hybrid_scores = []
    for movie_id in candidate_movies:
        svdpp_score = svdpp_predictions
        lr_score = tfidf_predictions
        hybrid_score = svdpp_score + lr_score[0]
        hybrid_scores.append((movie_id, hybrid_score))


    hybrid_scores.sort(key=lambda x: x[1][0], reverse=True)
    top_recommendations = hybrid_scores[:top_n]
    top_movies = [df[df['movieId'] == movie_id]['title'].values[0] for movie_id, _ in top_recommendations]

    return top_movies


## 7. Recommendation

In [None]:
from IPython.display import display

user_id = 816
recommendations = get_hybrid_recommendations(user_id)
print(f"Top 10 recommendations for User with ID {user_id}")
print()



display(recommendations)


Top 10 recommendations for User with ID 816



['Forrest Gump (1994)',
 'Die Hard (1988)',
 'Star Wars: Episode VI - Return of the Jedi (1983)',
 'Indiana Jones and the Last Crusade (1989)',
 'Gandhi (1982)',
 'Last Emperor, The (1987)',
 'Matrix, The (1999)',
 'Notting Hill (1999)',
 'Shrek (2001)',
 'Monsters, Inc. (2001)']

In [None]:
import timeit

def measure_recommendation_time():
    user_id = 1
    recommendations = get_hybrid_recommendations(user_id)
    display(recommendations)

# Use timeit to measure the execution time
execution_time = timeit.timeit(measure_recommendation_time, number=1)

print()
print(f"Recommendation system execution time: {execution_time} seconds")

['Toy Story (1995)',
 'Braveheart (1995)',
 'Casper (1995)',
 'Star Wars: Episode IV - A New Hope (1977)',
 'Forrest Gump (1994)',
 'When a Man Loves a Woman (1994)',
 'Pinocchio (1940)',
 'Die Hard (1988)',
 'Ghost and the Darkness, The (1996)',
 'Shall We Dance (1937)']


Recommendation system execution time: 6.985137040000154 seconds


In [None]:
import random

# Replace this function with your actual recommendation logic
def get_movie_recommendations(user_id):
    # Replace this list with your actual movie data
    #all_movies = ["Movie1", "Movie2", "Movie3", "Movie4", "Movie5", "Movie6", "Movie7", "Movie8", "Movie9", "Movie10"]
    all_movies = df[df['userId'] == user_id]['movieId'].tolist()

    # Simulate recommendations, possibly including unexpected ones
    recommendations = random.sample(all_movies, k=5)

    return recommendations


In [None]:
def serendipity_test(user_id):
    # Get regular recommendations
    regular_recommendations = get_movie_recommendations(user_id)

    # Introduce some unexpected recommendations
    unexpected_recommendations = ["SurpriseMovie1", "SurpriseMovie2", "SurpriseMovie3"]

    # Combine regular and unexpected recommendations
    all_recommendations = regular_recommendations + unexpected_recommendations

    # Shuffle the combined list to simulate unpredictability
    random.shuffle(all_recommendations)

    # Print the recommendations
    print("Regular Recommendations for User", user_id, ":")
    print(regular_recommendations)

    print("\nUnexpected Recommendations for User", user_id, ":")
    print(unexpected_recommendations)

    print("\nAll Recommendations (Shuffled) for User", user_id, ":")
    print(all_recommendations)

# Example usage
user_id = 1
serendipity_test(user_id)

Regular Recommendations for User 1 :
[1036, 110, 8969, 4993, 2908]

Unexpected Recommendations for User 1 :
['SurpriseMovie1', 'SurpriseMovie2', 'SurpriseMovie3']

All Recommendations (Shuffled) for User 1 :
['SurpriseMovie1', 2908, 4993, 'SurpriseMovie2', 'SurpriseMovie3', 1036, 110, 8969]


def movie_recommendation_system(user_id):
     Replace this with your actual recommendation logic
    recommendations = get_hybrid_recommendations(user_id)

     Use a set to store unique recommendations
    unique_recommendations = set(recommendations)

    Print the unique recommendations
    print("Unique Movie Recommendations for User", user_id, ":")
    for movie in unique_recommendations:
        print(movie)

 Example usage
user_id = 1
movie_recommendation_system(user_id)