In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# Load the synthetic data
user_data = pd.read_csv('../users.csv')
item_data = pd.read_csv('../items.csv')
ratings_data = pd.read_csv('../ratings.csv')

In [4]:
# Aggregate duplicate entries by calculating the mean rating
ratings_data = ratings_data.groupby(['user_id', 'item_id'], as_index=False).mean()

In [5]:
# Pivot the ratings matrix
ratings_matrix = ratings_data.pivot(index='user_id', columns='item_id', values='rating').fillna(0)

In [6]:
# Compute the cosine similarity matrix
user_similarity = cosine_similarity(ratings_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=ratings_matrix.index, columns=ratings_matrix.index)

In [8]:
# def predict_ratings(user_id):
#     user_ratings = ratings_matrix.loc[user_id]
#     similar_users = user_similarity_df[user_id].sort_values(ascending=False)
#     similar_users_ratings = ratings_matrix.loc[similar_users.index]
#     weighted_ratings = similar_users_ratings.T.dot(similar_users) / similar_users.sum()
#     predicted_ratings = weighted_ratings.loc[user_id]
#     return predicted_ratings

In [7]:
def predict_ratings(user_id):
    user_ratings = ratings_matrix.loc[user_id]
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)
    similar_users_ratings = ratings_matrix.loc[similar_users.index]
    weighted_ratings = similar_users_ratings.T.dot(similar_users) / similar_users.sum()
    return weighted_ratings

In [8]:
# Print unique user IDs
unique_user_ids = ratings_data['user_id'].unique()
print("Unique user IDs in the dataset:", unique_user_ids)

Unique user IDs in the dataset: ['user_1' 'user_10' 'user_100' 'user_11' 'user_12' 'user_13' 'user_14'
 'user_15' 'user_16' 'user_17' 'user_18' 'user_19' 'user_2' 'user_20'
 'user_21' 'user_22' 'user_23' 'user_24' 'user_25' 'user_26' 'user_27'
 'user_28' 'user_29' 'user_3' 'user_30' 'user_31' 'user_32' 'user_33'
 'user_34' 'user_35' 'user_36' 'user_37' 'user_38' 'user_39' 'user_4'
 'user_40' 'user_41' 'user_42' 'user_43' 'user_44' 'user_45' 'user_46'
 'user_47' 'user_48' 'user_49' 'user_5' 'user_50' 'user_51' 'user_52'
 'user_53' 'user_54' 'user_55' 'user_56' 'user_57' 'user_58' 'user_59'
 'user_6' 'user_60' 'user_61' 'user_62' 'user_63' 'user_64' 'user_65'
 'user_66' 'user_67' 'user_68' 'user_69' 'user_7' 'user_70' 'user_71'
 'user_72' 'user_73' 'user_74' 'user_75' 'user_76' 'user_77' 'user_78'
 'user_79' 'user_8' 'user_80' 'user_81' 'user_82' 'user_83' 'user_84'
 'user_85' 'user_86' 'user_87' 'user_88' 'user_89' 'user_9' 'user_90'
 'user_91' 'user_92' 'user_93' 'user_94' 'user_95' 'u

In [15]:
# Example: Predict ratings for a user
user_id = 'user_60'
predicted_ratings = predict_ratings(user_id)
print(f"Predicted ratings for {user_id}:\n", predicted_ratings)

Predicted ratings for user_60:
 item_id
item_1     0.354709
item_10    0.532000
item_11    0.695174
item_12    1.550825
item_13    0.591430
item_14    0.506999
item_15    1.515992
item_16    0.254611
item_17    0.457028
item_18    0.898200
item_19    0.375253
item_2     0.592405
item_20    0.464924
item_21    0.458689
item_22    0.307106
item_23    0.503186
item_24    0.309129
item_25    0.561923
item_26    0.634494
item_27    0.218553
item_28    0.321888
item_29    0.601933
item_3     0.342626
item_30    0.477969
item_31    0.508772
item_32    0.561255
item_33    0.565450
item_34    0.576010
item_35    0.676770
item_36    0.317638
item_37    0.626617
item_38    0.515114
item_39    0.393061
item_4     0.417544
item_40    0.347818
item_41    0.385907
item_42    0.259980
item_43    0.475131
item_44    0.352781
item_45    0.642301
item_46    0.685964
item_47    0.573355
item_48    0.388975
item_49    0.987558
item_5     0.291853
item_50    0.609406
item_6     0.477401
item_7     0.355743


In [12]:
# # Verify that user_1 is still present after aggregation
# if 'user_1' not in ratings_data['user_id'].unique():
#     raise ValueError("user_1 is not present in the dataset after aggregation")

In [None]:
# from sklearn.metrics.pairwise import cosine_similarity

# # Compute user similarity matrix
# user_similarity = cosine_similarity(user_item_matrix)

# # Make recommendations for a user
# def recommend_items(user_id, user_item_matrix, user_similarity, top_n=5):
#     user_idx = user_id - 1
#     similar_users = user_similarity[user_idx]
#     similar_users[user_idx] = 0  # Exclude the user itself
    
#     similar_users_idx = similar_users.argsort()[::-1][:top_n]
#     recommended_items = set()
    
#     for idx in similar_users_idx:
#         recommended_items.update(user_item_matrix[idx].nonzero()[0])
    
#     return list(recommended_items)[:top_n]

# # Example recommendation for user 1
# print(recommend_items(1, user_item_matrix, user_similarity))


In [None]:
# from flask import Flask, request, jsonify

# app = Flask(__name__)

# @app.route('/recommend', methods=['GET'])
# def recommend():
#     user_id = int(request.args.get('user_id'))
#     recommendations = recommend_items(user_id, user_item_matrix, user_similarity)
#     return jsonify(recommendations)

# if __name__ == '__main__':
#     app.run(debug=True)


In [16]:
from flask import Flask, jsonify, request

In [17]:
app = Flask(__name__)

@app.route('/')
def home():
    return 'Welcome to the Recommender System API!'

@app.route('/predict', methods=['POST'])
def predict():
    user_id = request.json['user_id']
    predicted_ratings = predict_ratings(user_id)
    return jsonify(predicted_ratings.to_dict())

if __name__ == '__main__':
    app.run(debug=True)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [18]:
%tb

SystemExit: 1