Found this project notebook on Kaggle where I sourced my dataset.  I used his reduce memory function quite often in my project to make manipulating the data easier.  Additionally, I felt like if I could implement a user game matrix into my existing algorithm I could refine the output.  As the output of this recommender is rather linear and focuses primarily on items and users that are similar to each other.  

Might implement a version of this at a later date.

Source is https://www.kaggle.com/code/thakursankalp/steam-game-recommendation-engine by user Sankalp Thakur

In [2]:
%pip install mlxtend

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from scipy.sparse import csr_matrix
from mlxtend.frequent_patterns import apriori, association_rules

# Function to reduce the memory usage of a DataFrame.
def reduce_memory(df):
    for col in df.columns:
        if df[col].dtype == 'float64':
            df[col] = df[col].astype('float32')
        if df[col].dtype == 'int64':
            df[col] = df[col].astype('int32')
    return df

# Generator function to load data in chunks.
def data_generator(df, chunksize=10000):
    for i in range(0, df.shape[0], chunksize):
        yield df.iloc[i:i+chunksize]


games = reduce_memory(pd.read_csv('../Data/games.csv'))
users = reduce_memory(pd.read_csv('../Data/users.csv'))
recommendations = reduce_memory(pd.read_csv('../Data/recommendations.csv'))
        
        
    

In [12]:
from scipy.sparse import coo_matrix

# map each user and item to a unique numeric value
user_ids = recommendations['user_id'].astype('category').cat.codes
item_ids = recommendations['app_id'].astype('category').cat.codes

# Get the unique user and game ids
unique_user_ids = recommendations['user_id'].astype('category').cat.categories
unique_item_ids = recommendations['app_id'].astype('category').cat.categories

# create a sparse matrix
user_game_matrix = coo_matrix((recommendations['hours'], (user_ids, item_ids)))

# Fit the model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(user_game_matrix)


# Get top 5 recommendations for first user
distances, indices = model_knn.kneighbors(user_game_matrix.getrow(0), n_neighbors=6)
recommended_users = [unique_user_ids[i] for i in indices.flatten()[1:]]
print(f'Recommended users for the first user are: {recommended_users}')

Recommended users for the first user are: [11422900, 7483536, 9284221, 6590596, 11161013]


In [3]:
# Content-Based Filtering
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=1, stop_words='english')
tfidf_matrix = tf.fit_transform(games['title'])
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

# Get top 5 recommendations for first game
similar_indices = cosine_similarities[0].argsort()[:-6:-1]
recommended_games = [games['title'].iloc[i] for i in similar_indices]
print(f'Recommended games for the first game are: {recommended_games}')

Recommended games for the first game are: ['Prince of Persia: Warrior Within™', 'Prince of Persia®', 'Prince of Persia: The Two Thrones™', 'Prince of Persia®: The Sands of Time', 'Prince of Persia: The Forgotten Sands™']


In [4]:
import numpy as np

def get_similar_users(user_id, user_game_matrix, model_knn, n_neighbors=6):
    distances, indices = model_knn.kneighbors(user_game_matrix.getrow(user_id), n_neighbors=n_neighbors)
    similar_users = [unique_user_ids[i] for i in indices.flatten()[1:]]
    return similar_users

# Get the unique game ids
unique_game_ids = recommendations['app_id'].astype('category').cat.categories

def get_similar_games(game_id, tfidf_matrix, n_neighbors=6):
    # Find the positional index of the game_id
    game_index = np.where(unique_game_ids == game_id)[0][0]
    
    cosine_similarities = linear_kernel(tfidf_matrix[game_index], tfidf_matrix).flatten()
    similar_indices = cosine_similarities.argsort()[:-n_neighbors:-1]
    similar_games = [games['title'].iloc[i] for i in similar_indices]
    return similar_games

def recommend_games(user_id):
    similar_users = get_similar_users(user_id, user_game_matrix, model_knn)
    similar_games = []
    for user in similar_users:
        user_games = recommendations[recommendations['user_id'] == user]['app_id'].unique()
        for game_id in user_games:
            similar_games.extend(get_similar_games(game_id, tfidf_matrix))
    return similar_games

In [6]:
recommend_games(0)

['Deus Ex: Game of the Year Edition',
 'Fallout 3: Game of the Year Edition',
 'BADLAND: Game of the Year Edition',
 'ConflictCraft 2 - Game of the Year Edition',
 'Rising Storm Game of the Year Edition',
 'Super High Ball: Pinball Platformer',
 'Almost There: The Platformer',
 'The Ball',
 '8 Ball 2',
 'THE E BALL',
 'Outward Soundtrack',
 'Outward: The Three Brothers',
 'Outward - The Soroboreans',
 'Soundtrack',
 'Out There Somewhere - Soundtrack',
 'HeapVR',
 'FatalZone',
 'Hunt: Showdown - Double or Nothing',
 'Horror Bar VR',
 'Gigant',
 'Super High Ball: Pinball Platformer',
 'Almost There: The Platformer',
 'The Ball',
 '8 Ball 2',
 'THE E BALL',
 'Age of Mythology: Extended Edition',
 'Anna - Extended Edition',
 'Transient: Extended Edition',
 'Age of Mythology EX: Tale of the Dragon',
 'D.N.Age',
 'TerraTech - Weapons of War Pack',
 'TerraTech',
 'Too Many Weapons',
 'Kold War Pack',
 'Steel Division 2 - Back To War Pack',
 'Borderlands 2: Headhunter 4: Wedding Day Massacre',