In [25]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import pairwise_distances

In [61]:
# reading DataFrame
user_info_df = pd.read_csv('user_info.dat', sep='\t', names=['New_ID', 'Real_ID'], skiprows=1)
item_info_df = pd.read_csv('item_info.dat', sep='\t', names=['Game_ID', 'Game_Name'], skiprows=1)
game_play_df = pd.read_csv('game_play.dat', sep='\t', names=['User_ID', 'Game_ID', 'Hours'], skiprows=1)
game_purchase_df = pd.read_csv('game_purchase.dat', sep='\t', names=['User_ID', 'Game_ID', 'Purchase'], skiprows=1)

In [99]:
data = pd.merge(game_play_df, game_purchase_df, on=['User_ID', 'Game_ID'], how='outer')
data = pd.merge(data, user_info_df, left_on='User_ID', right_on='New_ID')
data = pd.merge(data, item_info_df, on='Game_ID')

In [116]:
# Create user-item matrix
data_pivot = data.pivot_table(index='Real_ID', columns='Game_Name', values='Hours')
data_matrix = data_pivot.fillna(0).values

#data_pivot.to_csv('data_pivot.csv')
#data_matrix_df = pd.DataFrame(data_matrix, index=data_pivot.index, columns=data_pivot.columns)
#data_matrix_df.to_csv('data_matrix.csv')

#n=5
#print(data_pivot.head(n))
#print(data_pivot.shape[1]) # Total no. of games
#print(data_matrix[:n])
#n_users = data_matrix.shape[0]
#print("Total number of users: ", n_users)
#n_users = data["Real_ID"].nunique()
#print("Total number of unique users: ", n_users)

In [115]:
# Calculate user similarity
user_similarity = pairwise_distances(data_matrix, metric='cosine')

In [117]:
# Predict ratings
def predict(ratings, similarity):
    mean_user_rating = ratings.mean(axis=1)
    ratings_diff = (ratings - mean_user_rating[:, np.newaxis])
    pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
    return pred

# Make predictions
user_prediction = predict(data_matrix, user_similarity)

In [118]:
# Index of the user in the data_matrix variable
# 0 - 11349
user_index = 11349

# Get top n recommendations
n = 5
top_n_indices = np.argsort(user_prediction[user_index])[::-1][:n]
top_n_games = data_pivot.columns[top_n_indices]

# Print recommendations
print(f'Top {n} recommendations for user {data_pivot.index[user_index]}:')
for game in top_n_games:
    print(f'- {game}')

Top 5 recommendations for user 309903146:
- Counter-Strike Global Offensive
- Team Fortress 2
- Counter-Strike
- Sid Meier's Civilization V
- Counter-Strike Source
