In [None]:
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD, KNNBasic
from surprise.model_selection import train_test_split, GridSearchCV
from surprise import accuracy
from surprise import dump
from sklearn.preprocessing import MultiLabelBinarizer

# Load the MovieLens dataset
data = Dataset.load_builtin('ml-100k')

# Define a Reader to parse the data
reader = Reader(line_format='user item rating timestamp', sep='\t')

# Load the data into a pandas DataFrame
df = pd.DataFrame(data.raw_ratings, columns=['user_id', 'movie_id', 'rating', 'timestamp'])

# Sample movie information for content-based filtering
movies = pd.read_csv('https://raw.githubusercontent.com/sidooms/MovieTweetings/master/latest/movies.csv')
movies.columns = ['movie_id', 'title', 'genres']
movies['genres'] = movies['genres'].str.split('|')

# Merge ratings with movie information
df = df.merge(movies, on='movie_id')

# Display the first few rows of the dataset
print("Dataset Sample:")
print(df.head())

# One-hot encode genres
mlb = MultiLabelBinarizer()
genre_encoded = mlb.fit_transform(df['genres'])
genre_df = pd.DataFrame(genre_encoded, columns=mlb.classes_)

# Combine the original DataFrame with genre DataFrame
df_combined = pd.concat([df[['user_id', 'movie_id', 'rating']], genre_df], axis=1)

# Create the train and test datasets
trainset, testset = train_test_split(data, test_size=0.2)

# Hyperparameter tuning for the SVD model
param_grid = {
    'n_factors': [50, 100, 150],
    'n_epochs': [20, 30],
    'lr_all': [0.005, 0.01],
    'reg_all': [0.02, 0.1]
}
grid_search = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=3)
grid_search.fit(data)

# Best SVD model
best_svd = grid_search.best_estimator['rmse']
print(f"Best SVD Parameters: {grid_search.best_params['rmse']}")

# Fit the best model on the full training set
best_svd.fit(trainset)

# Predictions on the test set
predictions = best_svd.test(testset)

# Compute and print RMSE and MAE
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)
print(f"\nRoot Mean Squared Error: {rmse}")
print(f"Mean Absolute Error: {mae}")

# Function to get movie recommendations using collaborative filtering
def get_cf_recommendations(user_id, num_recommendations=5):
    recommendations = []
    all_movie_ids = df['movie_id'].unique()
    
    for m_id in all_movie_ids:
        if m_id not in df[df['user_id'] == user_id]['movie_id'].values:
            rating = best_svd.predict(user_id, m_id).est
            recommendations.append((m_id, rating))
    
    recommendations.sort(key=lambda x: x[1], reverse=True)
    return recommendations[:num_recommendations]

# Function to get movie recommendations using content-based filtering
def get_cb_recommendations(user_id, num_recommendations=5):
    user_ratings = df[df['user_id'] == user_id]
    user_genres = user_ratings.iloc[:, 3:].mean().sort_values(ascending=False)
    
    # Get movies that the user hasn't rated yet
    unrated_movies = df[~df['movie_id'].isin(user_ratings['movie_id'])]
    
    # Score unrated movies based on genre preference
    scores = unrated_movies.iloc[:, 3:].dot(user_genres)
    top_movies = scores.nlargest(num_recommendations).index
    return unrated_movies.iloc[top_movies][['movie_id', 'title']]

# Main recommendation function
def get_recommendations(user_id, num_recommendations=5):
    print(f"\nGetting recommendations for User ID: {user_id}")
    
    # Get collaborative filtering recommendations
    cf_recommendations = get_cf_recommendations(user_id, num_recommendations)
    print("\nCollaborative Filtering Recommendations:")
    for movie in cf_recommendations:
        print(f"Movie ID: {movie[0]}, Predicted Rating: {movie[1]:.2f}")
    
    # Get content-based filtering recommendations
    cb_recommendations = get_cb_recommendations(user_id, num_recommendations)
    print("\nContent-Based Filtering Recommendations:")
    for index, row in cb_recommendations.iterrows():
        print(f"Movie ID: {row['movie_id']}, Title: {row['title']}")

# Example usage
user_id = 196  # Replace with any valid user ID
get_recommendations(user_id, num_recommendations=5)

# Saving the model
dump.dump('best_model_file', algo=best_svd)