In [49]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

df = pd.read_csv("sample_output_preprocessing.csv")

### Bonus : Recommend games based on similar games (engine-genre1-genre2-genre3)
#### Input: One Game --> Output: List of 5 similar games

In [2]:
df.head()

Unnamed: 0.1,Unnamed: 0,user_id,game_name,time_is_play,searchingName,collection,follows,game_engines,genre1,genre2,...,09_count_plus,10_count_100,11_count_allstyle,12_count_completed,13_count_speedrun,14_count_backlog,15_count_playing,16_count_retired,17_review_score,18_count_review
0,0,5250,Counter-Strike,1.0,Counter-Strike,Counter-Strike,116.0,GoldSrc,Shooter,,...,9.0,5.0,42.0,764.0,1.0,2031.0,64.0,1905.0,81.0,231.0
1,1,76767,Counter-Strike,366.0,Counter-Strike,Counter-Strike,116.0,GoldSrc,Shooter,,...,9.0,5.0,42.0,764.0,1.0,2031.0,64.0,1905.0,81.0,231.0
2,2,86540,Counter-Strike,1.0,Counter-Strike,Counter-Strike,116.0,GoldSrc,Shooter,,...,9.0,5.0,42.0,764.0,1.0,2031.0,64.0,1905.0,81.0,231.0
3,3,103360,Counter-Strike,1.0,Counter-Strike,Counter-Strike,116.0,GoldSrc,Shooter,,...,9.0,5.0,42.0,764.0,1.0,2031.0,64.0,1905.0,81.0,231.0
4,4,144736,Counter-Strike,1.1,Counter-Strike,Counter-Strike,116.0,GoldSrc,Shooter,,...,9.0,5.0,42.0,764.0,1.0,2031.0,64.0,1905.0,81.0,231.0


# Content Based Approach (For New Users) 

### Generate a List of 5 recommended Games Based on a Game as input

In [31]:
features = ["game_name","game_engines","genre1","genre2","genre3"]
for feature in features:
    df[feature] = df[feature].fillna('')
df_important = df[features]


In [32]:
df_important = df_important.drop_duplicates()
df_important = df_important.reset_index(drop=True)


In [33]:
df_important.head()

Unnamed: 0,game_name,game_engines,genre1,genre2,genre3
0,Counter-Strike,GoldSrc,Shooter,,
1,Worms Armageddon,,Shooter,Platform,Strategy
2,Left 4 Dead 2,Source,Shooter,,
3,Overlord,,Role-playing (RPG),,
4,BattleBlock Theater,,Platform,Puzzle,Adventure


In [34]:
#Combine relevant features
def combined_features(row):
    return row['game_engines']+" "+row['genre1']+" "+row['genre2']+" "+row['genre3']
df_important["combined_features"] = df_important.apply(combined_features, axis=1)

In [35]:
df_important.shape

(115, 6)

In [36]:
df_important.head(15)

Unnamed: 0,game_name,game_engines,genre1,genre2,genre3,combined_features
0,Counter-Strike,GoldSrc,Shooter,,,GoldSrc Shooter
1,Worms Armageddon,,Shooter,Platform,Strategy,Shooter Platform Strategy
2,Left 4 Dead 2,Source,Shooter,,,Source Shooter
3,Overlord,,Role-playing (RPG),,,Role-playing (RPG)
4,BattleBlock Theater,,Platform,Puzzle,Adventure,Platform Puzzle Adventure
5,Borderlands,Unreal Engine,Shooter,Role-playing (RPG),,Unreal Engine Shooter Role-playing (RPG)
6,Dead Island,Chrome Engine,Shooter,Hack and slash/Beat 'em up,,Chrome Engine Shooter Hack and slash/Beat 'em up
7,Jazzpunk,Unity,Adventure,Indie,,Unity Adventure Indie
8,Left 4 Dead,Source,Shooter,,,Source Shooter
9,Legend of Grimrock,In-house engine,Puzzle,Role-playing (RPG),Adventure,In-house engine Puzzle Role-playing (RPG) Adve...


In [47]:
#Extracting features
cv = CountVectorizer()
count_matrix = cv.fit_transform(df_important["combined_features"])
print("Count Matrix:", count_matrix.toarray())

Count Matrix: [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [48]:
#Use the Cosine Similarity
cosine_sim = cosine_similarity(count_matrix)
cosine_sim

array([[1.        , 0.40824829, 0.5       , ..., 0.        , 0.        ,
        0.        ],
       [0.40824829, 1.        , 0.40824829, ..., 0.40824829, 0.57735027,
        0.        ],
       [0.5       , 0.40824829, 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.40824829, 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.57735027, 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [39]:
def get_index_from_name(name):
    return df_important[df_important.game_name == name].index.values[0]



In [87]:
#Generating Similar Games Matrix
def get_game_from_index(index):
    return df_important[df_important.index == index]['game_name'].values[0]

def generate_similar_games(game_user_likes):
    game_index = get_index_from_name(game_user_likes)
    similar_games = list(enumerate(cosine_sim[game_index]))
    sorted_similar_games = sorted(similar_games, key=lambda x:x[1], reverse=True)
    sorted_similar_games = sorted_similar_games[1:]
    score = pd.DataFrame(columns=['Name','Score'])
    i=0
    print('The 5 most recommended Games (Based on Genre and Engine) to '+game_user_likes +' are: \n')
    for game in sorted_similar_games:
        print(get_game_from_index(game[0]))
        newLine = {'Name':get_game_from_index(game[0]),'Score':game[1]}
        score = score.append(newLine, ignore_index=True)
        i=i+1
        if i>4:
            break
    print(score)

# Testing

In [88]:
#Let's test with Counter-Strike
generate_similar_games("Counter-Strike")

The 5 most recommended Games (Based on Genre and Engine) to Counter-Strike are: 

Left 4 Dead 2
Left 4 Dead
Iron Brigade
Fuse
Worms Armageddon
               Name     Score
0     Left 4 Dead 2  0.500000
1       Left 4 Dead  0.500000
2      Iron Brigade  0.500000
3              Fuse  0.500000
4  Worms Armageddon  0.408248


In [89]:
#Let's test with Worms Armageddon
generate_similar_games("Worms Armageddon")

The 5 most recommended Games (Based on Genre and Engine) to Worms Armageddon are: 

Iron Brigade
The Swapper
Braid
Toki Tori
Gravilon
           Name     Score
0  Iron Brigade  0.816497
1   The Swapper  0.666667
2         Braid  0.666667
3     Toki Tori  0.666667
4      Gravilon  0.666667


In [90]:
#Let's test with Left 4 Dead 2
generate_similar_games("Left 4 Dead 2")

The 5 most recommended Games (Based on Genre and Engine) to Left 4 Dead 2 are: 

Left 4 Dead
Counter-Strike
Iron Brigade
Fuse
Worms Armageddon
               Name     Score
0       Left 4 Dead  1.000000
1    Counter-Strike  0.500000
2      Iron Brigade  0.500000
3              Fuse  0.500000
4  Worms Armageddon  0.408248


In [91]:
# Let's test with BattleBlock Theater
generate_similar_games("BattleBlock Theater")

The 5 most recommended Games (Based on Genre and Engine) to BattleBlock Theater are: 

The Swapper
Braid
Super Meat Boy
Toki Tori
BEEP
             Name     Score
0     The Swapper  0.666667
1           Braid  0.666667
2  Super Meat Boy  0.666667
3       Toki Tori  0.666667
4            BEEP  0.666667


In [45]:
#df_important[df_important.game_name == 'Starbound']

In [92]:
generate_similar_games("Fuse")

The 5 most recommended Games (Based on Genre and Engine) to Fuse are: 

Crysis
Indiana Jones and the Last Crusade
Mirror's Edge
Betrayer
Counter-Strike
                                 Name     Score
0                              Crysis  0.816497
1  Indiana Jones and the Last Crusade  0.707107
2                       Mirror's Edge  0.632456
3                            Betrayer  0.632456
4                      Counter-Strike  0.500000
