In [1]:
# Author: Michael Jeremy Treacy
# Content: This file contains item-based and user-based recommenders for Steam.
# Sources: http://www.salemmarafi.com/code/collaborative-filtering-with-python/#:~:text=Collaborative%20Filtering%20with%20Python%201%20Refresher%3A%20The%20Last.FM,collaborative%20Filtering.%20...%204%20Entire%20Code%205%20Referenence
#          https://medium.com/@sam.mail2me/recommendation-systems-collaborative-filtering-just-with-numpy-and-pandas-a-z-fa9868a95da2
#          https://en.wikipedia.org/wiki/Collaborative_filtering

In [2]:
import random
import numpy as np
import pandas as pd

main_df = pd.read_csv(filepath_or_buffer='steam-200k.csv', names=['player', 'game', 'behavior', 'quantity', 'other'])

In [3]:
main_df.head()

Unnamed: 0,player,game,behavior,quantity,other
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0,0
1,151603712,The Elder Scrolls V Skyrim,play,273.0,0
2,151603712,Fallout 4,purchase,1.0,0
3,151603712,Fallout 4,play,87.0,0
4,151603712,Spore,purchase,1.0,0


In [4]:
main_df.dtypes

player        int64
game         object
behavior     object
quantity    float64
other         int64
dtype: object

In [5]:
main_df['other'].sum()

0

In [6]:
main_df.isnull()

Unnamed: 0,player,game,behavior,quantity,other
0,False,False,False,False,False
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,False,False,False,False,False
...,...,...,...,...,...
199995,False,False,False,False,False
199996,False,False,False,False,False
199997,False,False,False,False,False
199998,False,False,False,False,False


In [7]:
main_df = main_df.drop(labels='other', axis=1)
main_df = main_df.dropna()
main_df = main_df[main_df.behavior != 'purchase']
main_df = main_df.drop(labels='behavior', axis=1)
main_df = main_df[main_df.quantity != 0]
main_df = main_df.rename(columns={'quantity': 'hours'})
main_df = main_df.reset_index(drop=True)

In [8]:
main_df.head()

Unnamed: 0,player,game,hours
0,151603712,The Elder Scrolls V Skyrim,273.0
1,151603712,Fallout 4,87.0
2,151603712,Spore,14.9
3,151603712,Fallout New Vegas,12.1
4,151603712,Left 4 Dead 2,8.9


In [9]:
main_df['hours'].mean()

48.878063243911484

In [10]:
len(main_df['player'].unique())

11350

In [11]:
len(main_df['game'].unique())

3600

In [12]:
# Remove players who've played less than 5 games,
# played one game over 100 hours more than their second favorite,
# or played less than a total of 50 hours.
players_df = main_df[['player']]
players_df = players_df.drop_duplicates()

for each_player in players_df['player'].tolist():
    player_df = main_df.loc[main_df['player'] == each_player]
    player_df = player_df.sort_values(by='hours', ascending=False)
    player_df = player_df.reset_index(drop=True)
    if (player_df.shape[0] < 5 or
        player_df.iloc[0][2] - player_df.iloc[1][2] > 100 or
        player_df['hours'].sum() < 50):
        main_df = main_df[main_df.player != each_player]
    
main_df = main_df.reset_index(drop=True)

In [13]:
main_df['player'].value_counts()

138941587    299
24469287     284
51557405     210
17530772     209
22301321     207
            ... 
161140815      5
163415496      5
187588985      5
192044880      5
133234931      5
Name: player, Length: 1016, dtype: int64

In [14]:
main_df.head()

Unnamed: 0,player,game,hours
0,53875128,Grand Theft Auto V,86.0
1,53875128,Insurgency,72.0
2,53875128,Left 4 Dead 2,71.0
3,53875128,METAL GEAR SOLID V THE PHANTOM PAIN,59.0
4,53875128,S.T.A.L.K.E.R. Shadow of Chernobyl,54.0


In [15]:
main_df['hours'].mean()

21.364821255257258

In [16]:
len(main_df['player'].unique())

1016

In [17]:
len(main_df['game'].unique())

2846

In [18]:
game_hours_df = main_df.drop(labels='player', axis=1)
game_hours_df = game_hours_df.groupby(by=['game']).sum()
game_hours_df = game_hours_df.rename(columns={'hours': 'total_hours'})
game_hours_df = game_hours_df.sort_values(by='total_hours', ascending=False)

In [19]:
game_hours_df.head()

Unnamed: 0_level_0,total_hours
game,Unnamed: 1_level_1
Counter-Strike Global Offensive,24782.5
Team Fortress 2,20108.5
The Elder Scrolls V Skyrim,19301.1
Dota 2,13581.5
Sid Meier's Civilization V,13447.0


In [20]:
game_hours_df.tail()

Unnamed: 0_level_0,total_hours
game,Unnamed: 1_level_1
Ethan Meteor Hunter,0.1
Urban Trial Freestyle,0.1
Urizen Shadows of the Cold,0.1
CRYENGINE,0.1
Slip,0.1


In [21]:
players_max_df = main_df.groupby(by=['player'], as_index=False, sort=False).max()
players_max_df = players_max_df.rename(columns={'hours': 'max'})
players_max_df = players_max_df.drop(labels='game', axis=1)

In [22]:
players_max_df.head()

Unnamed: 0,player,max
0,53875128,86.0
1,26122540,92.0
2,97298878,44.0
3,92107940,110.0
4,226212066,94.0


In [23]:
players_min_df = main_df.groupby(by=['player'], as_index=False, sort=False).min()
players_min_df = players_min_df.rename(columns={'hours': 'min'})
players_min_df = players_min_df.drop(labels='game', axis=1)

In [24]:
players_min_df.head()

Unnamed: 0,player,min
0,53875128,0.1
1,26122540,1.1
2,97298878,0.1
3,92107940,0.5
4,226212066,0.2


In [25]:
# Create game rating (sentiment) by player for his or her own games.
main_df = main_df.merge(right=players_max_df, how='left', on='player')
main_df = main_df.merge(right=players_min_df, how='left', on='player')
main_df['rating'] = ((main_df['hours'] - main_df['min']) / (main_df['max'] - main_df['min'])) * (10 - 1) + 1

In [26]:
main_df.head()

Unnamed: 0,player,game,hours,max,min,rating
0,53875128,Grand Theft Auto V,86.0,86.0,0.1,10.0
1,53875128,Insurgency,72.0,86.0,0.1,8.533178
2,53875128,Left 4 Dead 2,71.0,86.0,0.1,8.428405
3,53875128,METAL GEAR SOLID V THE PHANTOM PAIN,59.0,86.0,0.1,7.171129
4,53875128,S.T.A.L.K.E.R. Shadow of Chernobyl,54.0,86.0,0.1,6.647264


In [27]:
main_df = main_df.drop(labels=['hours', 'max', 'min'], axis=1)
players_by_games_df = main_df.pivot_table(index='player', columns='game', values='rating')
players_by_games_df = players_by_games_df.fillna(0)

In [28]:
players_by_games_df.head()

game,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,140,15 Days,...,ibb & obb,liteCam Game 100 FPS Game Capture,planetarian ~the reverie of a little planet~,rFactor 2,realMyst,resident evil 4 / biohazard 4,sZone-Online,the static speaks my name,theHunter,theHunter Primal
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5250,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76767,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
86540,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
229911,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
561758,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
games_by_games_df = players_by_games_df.transpose().dot(players_by_games_df)

In [30]:
games_by_games_df.head()

game,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,140,15 Days,...,ibb & obb,liteCam Game 100 FPS Game Capture,planetarian ~the reverie of a little planet~,rFactor 2,realMyst,resident evil 4 / biohazard 4,sZone-Online,the static speaks my name,theHunter,theHunter Primal
game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0RBITALIS,2.084812,0.0,1.062825,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.041005,0.0
1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),0.0,7.171383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10 Second Ninja,1.062825,0.0,1.05576,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100% Orange Juice,0.0,0.0,0.0,3.052755,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1000 Amps,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
# Create a df of each game's most similar games.
games_to_rec = 20
rec_by_game_df = pd.DataFrame(index=games_by_games_df.columns, columns=range(1, games_to_rec + 1))
games_slice = slice(0, games_to_rec)

for each_game in range(len(games_by_games_df.columns)):
    games_value_col = games_by_games_df.iloc[:, each_game]
    sorted_values_to_games = games_value_col.sort_values(ascending=False)[games_slice].index
    rec_by_game_df.iloc[each_game, games_slice] = sorted_values_to_games

In [32]:
rec_by_game_df.head()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0RBITALIS,Terraria,Elite Dangerous,Garry's Mod,Counter-Strike Global Offensive,Team Fortress 2,Fallout 4,Don't Starve,Sid Meier's Civilization V,Dying Light,Grand Theft Auto V,The Witcher 3 Wild Hunt,Borderlands The Pre-Sequel,Watch_Dogs,H1Z1,Dishonored,Far Cry 4,Batman Arkham Knight,Metro Last Light,Call of Duty 4 Modern Warfare,Half-Life 2
1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),Spiral Knights,Blacklight Retribution,Dungeon Defenders,Terraria,Defense Grid The Awakening,Borderlands 2,Unturned,Defiance,Call of Duty Modern Warfare 2 - Multiplayer,Saints Row The Third,Dota 2,Deus Ex Human Revolution,Call of Duty Ghosts - Multiplayer,Left 4 Dead 2,Counter-Strike Global Offensive,FINAL FANTASY XIV A Realm Reborn,Sid Meier's Civilization V,Garry's Mod,Portal 2,Dirty Bomb
10 Second Ninja,Elite Dangerous,Don't Starve,Grand Theft Auto V,The Witcher 3 Wild Hunt,Counter-Strike Global Offensive,Fallout 4,Watch_Dogs,H1Z1,Far Cry 4,Batman Arkham Knight,Terraria,Dying Light,Borderlands The Pre-Sequel,DayZ,Alien Isolation,Evolve,Dishonored,Batman Arkham Origins,Metro Last Light,Dead Island
100% Orange Juice,Garry's Mod,The Elder Scrolls V Skyrim,Grand Theft Auto V,NARUTO SHIPPUDEN Ultimate Ninja STORM 3 Full B...,Prison Architect,Portal 2,Arma 2 Operation Arrowhead,Elsword,The Sims(TM) 3,Euro Truck Simulator 2,NBA 2K16,Plague Inc Evolved,Team Fortress 2,Borderlands 2,Arma 3,Counter-Strike Global Offensive,DayZ,Fallout New Vegas,HuniePop,Left 4 Dead
1000 Amps,DARK SOULS II,Dungeon Defenders,Kerbal Space Program,War Thunder,Borderlands,Left 4 Dead 2,PAYDAY 2,The Witcher 2 Assassins of Kings Enhanced Edition,Battlefield Bad Company 2,Killing Floor,Divinity II Developer's Cut,Dragon Age Origins,The Witcher Enhanced Edition,Borderlands 2,Portal 2,Dungeon Defenders II,Age of Empires II HD Edition,Fallout New Vegas,Warframe,Sleeping Dogs


In [33]:
rec_by_game_df.loc[['Counter-Strike Global Offensive']]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Counter-Strike Global Offensive,Counter-Strike Global Offensive,Team Fortress 2,Garry's Mod,Dota 2,Unturned,Left 4 Dead 2,Counter-Strike Source,Terraria,The Elder Scrolls V Skyrim,PAYDAY 2,Borderlands 2,Sid Meier's Civilization V,Grand Theft Auto V,Counter-Strike,Portal 2,Rust,Trove,DayZ,Rocket League,Robocraft


In [34]:
rec_by_game_df.loc[['Sid Meier\'s Civilization V']]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Sid Meier's Civilization V,Sid Meier's Civilization V,The Elder Scrolls V Skyrim,Team Fortress 2,Terraria,Dota 2,Fallout New Vegas,Counter-Strike Global Offensive,Borderlands 2,Left 4 Dead 2,Garry's Mod,Portal 2,XCOM Enemy Unknown,Total War SHOGUN 2,Empire Total War,Total War ROME II - Emperor Edition,Sid Meier's Civilization IV Beyond the Sword,Counter-Strike Source,Don't Starve,BioShock Infinite,The Witcher 2 Assassins of Kings Enhanced Edition


In [35]:
# Create an array filled with each player's ratings.
players_arr = players_by_games_df.values

In [36]:
players_arr

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [37]:
players_df = main_df[['player']]
players_df = players_df.drop_duplicates()
player_id = players_df.iloc[random.randint(0, len(main_df['player'].unique()) - 1)][0]

In [38]:
player_id

55426012

In [39]:
main_df.loc[main_df['player'] == player_id]

Unnamed: 0,player,game,rating
7722,55426012,DARK SOULS II,10.000000
7723,55426012,Dungeon Defenders,8.351339
7724,55426012,Kerbal Space Program,5.688116
7725,55426012,War Thunder,5.603570
7726,55426012,Borderlands,5.561296
...,...,...,...
7845,55426012,Star Wars Republic Commando,1.004227
7846,55426012,Starbound,1.004227
7847,55426012,1000 Amps,1.000000
7848,55426012,Superbrothers Sword & Sworcery EP,1.000000


In [40]:
def get_player_index(player_id):
    """Get players-by-games df reg index according to player id passed in and return it."""
    x = 0
    for each_player_index in players_by_games_df.index:
        if each_player_index == player_id:
            return x
        x = x + 1

In [41]:
# Make a players array without this player's row.
other_players_arr = players_arr.copy()
player_index = get_player_index(player_id)
other_players_arr = np.delete(other_players_arr, player_index, axis=0)

In [42]:
def get_player_row(player_id):
    """Get players-by-games df value row according to player id passed in and return it."""
    x = 0
    for each_player_index in players_by_games_df.index:
        if each_player_index == player_id:
            return players_arr[x]
        x = x + 1

In [43]:
# Find the square, sum, and root of this player.
player = get_player_row(player_id)
denom_rx = [np.square(denom_rx) for denom_rx in player]
denom_rx = sum(denom_rx)
denom_rx = np.sqrt(denom_rx)

In [44]:
def get_other_index(regular_index):
    """Get players-by-games df index according to reg index passed in and return it."""
    x = 0
    for each_player_index in players_by_games_df.index:
        if x == regular_index:
            return each_player_index
        x = x + 1

In [45]:
# Create and fill a similarity matrix for the specified player beginning
# with the (player, cos) as the first in the similarity list.
similar_players = [(player_id, 1)]

x = 1
for each_players_hours in other_players_arr:
    # Find the square, sum, and root of the other player.
    denom_ry = [np.square(denom_ry) for denom_ry in each_players_hours]
    denom_ry = sum(denom_ry)
    denom_ry = np.sqrt(denom_ry)

    # Find the sum of products for this player and the other.
    prod = [numer_rx * numer_ry for numer_rx, numer_ry in zip(player, each_players_hours)]
    sum_of_prod = sum(prod)

    # Find the square, sum, and root of this player times the square, sum, and root of the other.
    square_sum_root_prod = denom_rx * denom_ry

    # Add (other, cos) to the similarity list.
    i = get_other_index(x)
    similar_players.append((i, sum_of_prod / square_sum_root_prod))
    x = x + 1
    
# Get the most similar players.
similar_players = sorted(similar_players, key=lambda player_cos: player_cos[1])
similar_players.reverse()
num_similar_players = 20
high_slice = slice(0, num_similar_players)
similar_players_high = similar_players[high_slice]

In [46]:
similar_players_high

[(55426012, 1),
 (22371742, 0.4287609983524755),
 (44370809, 0.40360669959495205),
 (80128229, 0.3985484163386727),
 (153097805, 0.3778744596204129),
 (42198457, 0.36647130229414393),
 (77194086, 0.3597208487540517),
 (176225382, 0.34862298744597436),
 (55906572, 0.3454680506917032),
 (198346312, 0.3324429087281142),
 (117531196, 0.3138811863550809),
 (24841093, 0.3121400825849001),
 (18066817, 0.3101379857087061),
 (58345543, 0.30988926008299544),
 (127311699, 0.30980276526762596),
 (8949216, 0.3092648119384884),
 (91627755, 0.3012668011702978),
 (69009454, 0.2890637533365894),
 (7163917, 0.2850013215586081),
 (2531540, 0.28459387416389403)]

In [47]:
# Create a df of similar players game ratings.
similar_players_high_df = pd.DataFrame()

for each_similar_player in similar_players_high:
    similar_players_high_df = similar_players_high_df.append(players_by_games_df.loc[each_similar_player[0]])
    
# Add the cos column.
similar_players_high_df['cos'] = [each_player[1] for each_player in similar_players_high]

In [48]:
similar_players_high_df

Unnamed: 0,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,140,15 Days,...,liteCam Game 100 FPS Game Capture,planetarian ~the reverie of a little planet~,rFactor 2,realMyst,resident evil 4 / biohazard 4,sZone-Online,the static speaks my name,theHunter,theHunter Primal,cos
55426012,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
22371742,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.428761
44370809,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.403607
80128229,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.398548
153097805,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.377874
42198457,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.366471
77194086,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.359721
176225382,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.348623
55906572,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.036532,0.0,0.345468
198346312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.332443


In [49]:
# Sum the cosines.
cos_sum = sum([each_player_cos[1] for each_player_cos in similar_players_high])

# Fill the rating of non-rated games for the specified player with the weighted average.
x = 0
for each_game_rating in similar_players_high_df.loc[player_id]:
    rating_cos_total = 0
    if each_game_rating == 0.0:
        for each_player in range(1, num_similar_players):
            their_rating = similar_players_high_df.iloc[each_player][x]
            their_cos = similar_players_high_df.iloc[each_player][len(similar_players_high_df.columns) - 1]
            rating_cos_total = rating_cos_total + their_rating * their_cos
        similar_players_high_df.loc[player_id][x] = rating_cos_total / cos_sum
    x = x + 1

In [50]:
similar_players_high_df

Unnamed: 0,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,140,15 Days,...,liteCam Game 100 FPS Game Capture,planetarian ~the reverie of a little planet~,rFactor 2,realMyst,resident evil 4 / biohazard 4,sZone-Online,the static speaks my name,theHunter,theHunter Primal,cos
55426012,0.0,0.101457,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.053956,0.0,0.0,0.0,0.0,0.0,0.048478,0.0,1.0
22371742,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.428761
44370809,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.403607
80128229,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.398548
153097805,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.377874
42198457,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.366471
77194086,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.359721
176225382,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.348623
55906572,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.036532,0.0,0.345468
198346312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.332443


In [51]:
player_games = list(main_df.loc[main_df['player'] == player_id].game)
# cos is added so it does not appear in the recommended list.
player_games.append('cos')

In [52]:
player_games

['DARK SOULS II',
 'Dungeon Defenders',
 'Kerbal Space Program',
 'War Thunder',
 'Borderlands',
 'Left 4 Dead 2',
 'PAYDAY 2',
 'The Witcher 2 Assassins of Kings Enhanced Edition',
 'Battlefield Bad Company 2',
 "Divinity II Developer's Cut",
 'Killing Floor',
 'Dragon Age Origins',
 'Borderlands 2',
 'The Witcher Enhanced Edition',
 'Portal 2',
 'Dungeon Defenders II',
 'Age of Empires II HD Edition',
 'Fallout New Vegas',
 'Warframe',
 'Sleeping Dogs',
 'Neverwinter Nights 2 Platinum',
 'Counter-Strike Global Offensive',
 'Legend of Grimrock',
 'PlanetSide 2',
 'FTL Faster Than Light',
 'Sanctum',
 'Eets Munchies',
 'Batman Arkham Asylum GOTY Edition',
 'Terraria',
 'Halo Spartan Assault',
 'Torchlight II',
 'The Walking Dead',
 'Beat Hazard',
 'Batman Arkham City GOTY',
 'SpeedRunners',
 'Rogue Legacy',
 'Magicka',
 "Don't Starve Together Beta",
 'Darksiders',
 'Saints Row The Third',
 'Mount Your Friends',
 'Half-Life 2',
 'Trine 2',
 'Batman Arkham Origins',
 'The Walking Dead Se

In [53]:
# Create the recommendation list.
rec_by_player = []

# Look for unplayed games with the highest calculated rating.
for game_index, rating in enumerate(similar_players_high_df.loc[player_id].values):
    if rating > 0.0:
        game_name = similar_players_high_df.columns[game_index]
        if game_name not in player_games:
            rec_by_player.append((game_name, rating))
            
# Sort the recommendations from best to worst.
rec_by_player.sort(key=lambda game_rating: game_rating[1], reverse=True)

In [54]:
main_df.loc[main_df['player'] == player_id]

Unnamed: 0,player,game,rating
7722,55426012,DARK SOULS II,10.000000
7723,55426012,Dungeon Defenders,8.351339
7724,55426012,Kerbal Space Program,5.688116
7725,55426012,War Thunder,5.603570
7726,55426012,Borderlands,5.561296
...,...,...,...
7845,55426012,Star Wars Republic Commando,1.004227
7846,55426012,Starbound,1.004227
7847,55426012,1000 Amps,1.000000
7848,55426012,Superbrothers Sword & Sworcery EP,1.000000


In [55]:
rec_by_player

[('Counter-Strike Source', 1.2797389789235183),
 ("Sid Meier's Civilization V", 1.1750437265403966),
 ('Dark Souls Prepare to Die Edition', 0.8003128038210632),
 ("Garry's Mod", 0.7579721823393155),
 ('Mass Effect', 0.6275970700483713),
 ('Football Manager 2012', 0.5464069618221463),
 ('Blacklight Retribution', 0.49074837273607474),
 ('Mass Effect 2', 0.4428698819965098),
 ('Football Manager 2014', 0.4225785011975611),
 ('Spiral Knights', 0.4195313142056184),
 ('Ultra Street Fighter IV', 0.41941421662193895),
 ('Tomb Raider', 0.4163829155328908),
 ('Rocket League', 0.40434329847017986),
 ('Deus Ex Human Revolution', 0.3857967803938051),
 ('Call of Duty 4 Modern Warfare', 0.38528615677377737),
 ('ARK Survival Evolved', 0.3848868867706504),
 ('Arma 3', 0.3817218332121256),
 ('Call of Duty Modern Warfare 2 - Multiplayer', 0.365950302513838),
 ('Robocraft', 0.3562367756453571),
 ('Defiance', 0.35535231455289934),
 ('Far Cry 3', 0.30135168488127706),
 ('PAYDAY The Heist', 0.3009957174507954