In [2]:
pip install numpy pandas scikit-surprise

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357247 sha256=be15074c293b36936019f9b8cb94097a28351fd2050279d3d72466db75dd8dcd
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


In [9]:
import numpy as np
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate, GridSearchCV
from surprise.accuracy import rmse

In [10]:
# Creating a synthetic dataset
data = {
    'user_id': np.repeat(np.arange(1, 21), 10),
    'movie_id': np.tile(np.arange(1, 11), 20),
    'rating': np.random.randint(1, 6, size=200),
    'user_gender': np.repeat(['M', 'F'], 100),
    'movie_genre': np.tile(['Action', 'Sci-Fi', 'Drama', 'Romance', 'Comedy', 'Action', 'Sci-Fi', 'Thriller', 'Comedy', 'Drama'], 20),
    'director_gender': np.tile(['M', 'M', 'F', 'F', 'F', 'F', 'F', 'M', 'M', 'M'], 20),
    'movie_name': np.tile(['Movie A', 'Movie B', 'Movie C', 'Movie D', 'Movie E', 'Movie F', 'Movie G', 'Movie H', 'Movie I', 'Movie J'], 20)
}

df = pd.DataFrame(data)

In [11]:
# Preparing the data for the Surprise library
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['user_id', 'movie_id', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.25)

In [12]:
# Hyperparameter tuning for SVD model
param_grid = {
    'n_factors': [50, 100, 150],
    'n_epochs': [20, 30],
    'lr_all': [0.005, 0.01, 0.02],
    'reg_all': [0.02, 0.1]
}
gs = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=5)
gs.fit(data)

In [13]:
# Best parameters found by GridSearchCV
best_params = gs.best_params['rmse']
print(f"Best parameters: {best_params}")

# Training the best SVD model
model = SVD(**best_params)
model.fit(trainset)
predictions = model.test(testset)
accuracy = rmse(predictions)
print(f'RMSE: {accuracy}')

Best parameters: {'n_factors': 50, 'n_epochs': 20, 'lr_all': 0.005, 'reg_all': 0.1}
RMSE: 1.3723
RMSE: 1.3723470240850248


In [14]:
# Cross-validation to check accuracy
cv_results = cross_validate(model, data, measures=['RMSE'], cv=5, verbose=False)
mean_rmse = np.mean(cv_results['test_rmse'])
print(f'Cross-validated RMSE: {mean_rmse}')


Cross-validated RMSE: 1.4101344117121015


In [16]:
# Function to get top N recommendations
def get_top_n(predictions, n=10):
    top_n = {}
    for uid, iid, true_r, est, _ in predictions:
        if uid not in top_n:
            top_n[uid] = []
        top_n[uid].append((iid, est))

    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

top_n = get_top_n(predictions, n=5)

In [17]:
# Fairness adjustments
def adjust_for_fairness(top_n, df, n=5):
    adjusted_recommendations = {}
    for user_id, user_recommendations in top_n.items():
        user_gender = df[df['user_id'] == user_id]['user_gender'].iloc[0]
        female_directed_movies = df[(df['director_gender'] == 'F')]['movie_id'].unique()

        adjusted_user_recommendations = []
        female_count = 0

        for movie_id, rating in user_recommendations:
            if movie_id in female_directed_movies and female_count < n//2:
                adjusted_user_recommendations.append((movie_id, rating))
                female_count += 1
            elif movie_id not in female_directed_movies:
                adjusted_user_recommendations.append((movie_id, rating))

        while len(adjusted_user_recommendations) < n:
            for movie_id in female_directed_movies:
                if movie_id not in [m[0] for m in adjusted_user_recommendations]:
                    adjusted_user_recommendations.append((movie_id, 0))
                    if len(adjusted_user_recommendations) >= n:
                        break

        adjusted_recommendations[user_id] = adjusted_user_recommendations[:n]

    return adjusted_recommendations

adjusted_top_n = adjust_for_fairness(top_n, df)

In [20]:
# Function to display recommendations
def display_recommendations(user_ids, recommendations, df):
    for user_id in user_ids:
        if user_id in recommendations:
            user_name = f"User {user_id}"
            print(f"{user_name}'s Recommendations:")
            recs = [(df[df['movie_id'] == movie_id]['movie_name'].values[0],
                     df[df['movie_id'] == movie_id]['movie_genre'].values[0],
                     df[df['movie_id'] == movie_id]['director_gender'].values[0],
                     rating) for movie_id, rating in recommendations[user_id]]
            for movie in recs:
                print(f"{movie[0]} ({movie[1]}, {movie[2]} Director)")
            print()
        else:
            print(f"User {user_id} has no recommendations.")

# Input list of user IDs
user_ids = input("Enter a list of user IDs separated by commas: ")
user_ids = [int(uid.strip()) for uid in user_ids.split(',')]

# Display recommendations for the selected users
display_recommendations(user_ids, adjusted_top_n, df)


Enter a list of user IDs separated by commas: 5,9
User 5's Recommendations:
Movie F (Action, F Director)
Movie H (Thriller, M Director)
Movie C (Drama, F Director)
Movie D (Romance, F Director)
Movie E (Comedy, F Director)

User 9's Recommendations:
Movie B (Sci-Fi, M Director)
Movie G (Sci-Fi, F Director)
Movie C (Drama, F Director)
Movie D (Romance, F Director)
Movie E (Comedy, F Director)

