In [1]:
import json
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import operator
import math
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize 

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/stevenlouie/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# Process data files

In [2]:
def eval_file(file):
    f = open(file, "r", encoding="utf-8")
    lst = []
    for line in f:
        obj = eval(line)
        lst.append(obj)
    json.dumps(lst)
    return lst

In [3]:
bundle_data = eval_file("../data/bundle_data.json")
users_items = eval_file("../data/australian_users_items.json")
steam_games = eval_file("../data/steam_games.json")

In [4]:
bundle_df = json_normalize(bundle_data, "items", ["bundle_final_price", "bundle_url", "bundle_price", "bundle_name", "bundle_id"])
steam_games_df = pd.DataFrame(data=steam_games)

# Data Preprocessing

In [5]:
def clean_bundle_data():    
    clean_bundle_df = bundle_df[bundle_df["genre"]!=""]
    return clean_bundle_df

In [6]:
def clean_games_data():
    steam_games_df.drop(labels=["app_name", "tags", "metascore"], axis=1, inplace=True)
    steam_games_df.dropna(subset=["title", "genres"], inplace=True)

In [7]:
def remove_idle_users():
    for i in users_items:
        if i["items_count"]==0:
            users_items.remove(i)

In [8]:
def perform_data_preprocessing():
    clean_games_data()
    remove_idle_users()

In [9]:
def filter_by_playtime(user_id, playtime):
    user_data = {}
    
    for user in users_items:
        if user["user_id"]==user_id:
            user_data = user
            break
    
    filtered_df = json_normalize(user_data, "items", ["user_id", "steam_id", "user_url"])
    filtered_df = filtered_df[filtered_df["playtime_forever"] > playtime]
    
    return filtered_df

In [166]:
def extract_genres_data_from_bundles(): 
    
    clean_bundle_df = clean_bundle_data()
    
    filter_genre_from_bundle = {}
    for id_ in clean_bundle_df["bundle_id"].unique():
        genres_list = []
#         words = ""
        for genres in clean_bundle_df[clean_bundle_df["bundle_id"]==id_]["genre"]:
#             for genre in genres.split(","):
#                 if genre not in words:
#                     words = words + genre + " "
            for genre in genres.split(", "):
                if genre not in genres_list:
                    genres_list.append(genre)
        filter_genre_from_bundle[id_] = genres_list
        
    return filter_genre_from_bundle

In [11]:
perform_data_preprocessing()

In [14]:
steam_games_df.head()

Unnamed: 0,publisher,genres,title,url,release_date,discount_price,reviews_url,specs,price,early_access,id,developer,sentiment
0,Kotoshiro,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,http://store.steampowered.com/app/761140/Lost_...,2018-01-04,4.49,http://steamcommunity.com/app/761140/reviews/?...,[Single-player],4.99,False,761140,Kotoshiro,
1,"Making Fun, Inc.","[Free to Play, Indie, RPG, Strategy]",Ironbound,http://store.steampowered.com/app/643980/Ironb...,2018-01-04,,http://steamcommunity.com/app/643980/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free To Play,False,643980,Secret Level SRL,Mostly Positive
2,Poolians.com,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,http://store.steampowered.com/app/670290/Real_...,2017-07-24,,http://steamcommunity.com/app/670290/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free to Play,False,670290,Poolians.com,Mostly Positive
3,彼岸领域,"[Action, Adventure, Casual]",弹炸人2222,http://store.steampowered.com/app/767400/2222/,2017-12-07,0.83,http://steamcommunity.com/app/767400/reviews/?...,[Single-player],0.99,False,767400,彼岸领域,
5,Trickjump Games Ltd,"[Action, Adventure, Simulation]",Battle Royale Trainer,http://store.steampowered.com/app/772540/Battl...,2018-01-04,,http://steamcommunity.com/app/772540/reviews/?...,"[Single-player, Steam Achievements]",3.99,False,772540,Trickjump Games Ltd,Mixed


In [167]:
all_bundles_genres_data = extract_genres_data_from_bundles()

In [16]:
filtered_playtime_df = filter_by_playtime('76561197970982479', 30)

# Recommend bundle to user based on most played genres

In [170]:
def get_all_genres_by_user(user):
    genre_count = {}
    for index, row in user.iterrows():
        if steam_games_df[steam_games_df["id"]==row["item_id"]].index.any()==True:
            game_genres = steam_games_df[steam_games_df["id"]==row["item_id"]]["genres"].values[0]
            for genre in game_genres:
                if genre not in genre_count:
                    genre_count[genre] = 1
                else:
                    genre_count[genre] = genre_count[genre] + 1

    return sorted(genre_count.items(), key=operator.itemgetter(1), reverse=True)

In [171]:
def parse_genres(genres):
#     g = ""
    lst = []
    for genre in genres[:math.ceil(len(genres)*.75)]:
#         g = g + genre[0] + " "
        lst.append(genre[0])
    return lst

In [185]:
def get_bundle_similarity_scores(user_genres, all_bundles_genres_data):
    sim = {}
#     ug = word_tokenize(user_genres)
    
    for id_ in list(all_bundles_genres_data.keys()):
        bundle_genres = all_bundles_genres_data[id_]

        lst1 = []
        lst2 = []
        cos = 0
        vector = list(set().union(user_genres, bundle_genres))
        for i in vector:
            if i in user_genres:
                lst1.append(1)
            else:
                lst1.append(0)
                
            if i in bundle_genres:
                lst2.append(1)
            else:
                lst2.append(0)
            cos += lst1[-1]*lst2[-1]

        sim[id_] = cos / float((sum(lst1)*sum(lst2))**0.5)
    
    return sorted(sim.items(), key=operator.itemgetter(1), reverse=True)

In [173]:
all_user_genres = get_all_genres_by_user(filtered_playtime_df)

In [175]:
top_rated_genres = parse_genres(all_user_genres)

In [186]:
sim = get_bundle_similarity_scores(top_rated_genres, all_bundles_genres_data)

In [187]:
sim

[('803', 0.9428090415820635),
 ('813', 0.9428090415820635),
 ('580', 0.8888888888888888),
 ('948', 0.8888888888888888),
 ('396', 0.8819171036881969),
 ('505', 0.8819171036881969),
 ('364', 0.8819171036881969),
 ('399', 0.8819171036881969),
 ('482', 0.8819171036881969),
 ('957', 0.8819171036881969),
 ('802', 0.8819171036881969),
 ('804', 0.8819171036881969),
 ('403', 0.8819171036881969),
 ('395', 0.8819171036881969),
 ('542', 0.8819171036881969),
 ('356', 0.8819171036881969),
 ('268', 0.8432740427115678),
 ('808', 0.8249579113843055),
 ('833', 0.8249579113843055),
 ('425', 0.8249579113843055),
 ('663', 0.8249579113843055),
 ('225', 0.8249579113843055),
 ('565', 0.8249579113843055),
 ('1437', 0.816496580927726),
 ('1441', 0.816496580927726),
 ('501', 0.816496580927726),
 ('801', 0.816496580927726),
 ('1206', 0.816496580927726),
 ('1143', 0.816496580927726),
 ('398', 0.816496580927726),
 ('824', 0.816496580927726),
 ('569', 0.816496580927726),
 ('646', 0.816496580927726),
 ('746', 0.81649

In [188]:
def get_bundle_recommendations(user_id, num_recommendations, sim, bundle_data=bundle_data):
    
    recommendations = []
    for id_ in sim[:num_recommendations]:
        for bundle in bundle_data:
            if bundle["bundle_id"]==id_[0]:
                recommendations.append(bundle)
                break
    
    return recommendations

In [189]:
recommendations = get_bundle_recommendations('76561197970982479', 3, sim)

In [190]:
recommendations

[{'bundle_final_price': '$4.87',
  'bundle_url': 'http://store.steampowered.com/bundle/803/?utm_source=SteamDB&utm_medium=SteamDB&utm_campaign=SteamDB%20Bundles%20Page',
  'bundle_price': '$32.41',
  'bundle_name': 'Platformer Bundle',
  'bundle_id': '803',
  'items': [{'genre': 'Action, Adventure, Indie, Simulation, Strategy',
    'item_id': '104200',
    'discounted_price': '$0.49',
    'item_url': 'http://store.steampowered.com/app/104200',
    'item_name': 'BEEP'},
   {'genre': 'Action, Adventure, Indie, Simulation, Strategy',
    'item_id': '263980',
    'discounted_price': '$1.99',
    'item_url': 'http://store.steampowered.com/app/263980',
    'item_name': 'Out There Somewhere'},
   {'genre': 'Action, Adventure, Indie',
    'item_id': '317250',
    'discounted_price': '$9.99',
    'item_url': 'http://store.steampowered.com/app/317250',
    'item_name': 'Airscape - The Fall of Gravity'},
   {'genre': 'Action, Adventure, Casual, Indie',
    'item_id': '341500',
    'discounted_pri

# Group Generation and Recommendation

In [191]:
def group_generation(user_id, num_of_users):
    user_data = {}
    
    for user in users_items:
        if user["user_id"]==user_id:
            user_data = user
            break
            
    filtered_df = json_normalize(user_data, "items", ["user_id", "steam_id", "user_url"])
    filtered_df = filtered_df[filtered_df["playtime_forever"] > 100]
    
    genres = get_all_genres_by_user(filtered_df)
    top_rated_genres = parse_genres(genres)
#     print(genres)
#     print(top_rated_genres)
    sim = {}
#     ug = word_tokenize(top_rated_genres)
    counter = 0
    
    np.random.shuffle(users_items)
    for user in users_items:
        if user["user_id"]!=user_id:
            user_df = json_normalize(user, "items", ["user_id", "steam_id", "user_url"])
            g = get_all_genres_by_user(user_df)
            parsed_genres = parse_genres(g)
#             tokens = word_tokenize(parsed_genres)
            
            lst1 = []
            lst2 = []
            cos = 0
            vector = list(set().union(top_rated_genres, parsed_genres))
            for i in vector:
                if i in top_rated_genres:
                    lst1.append(1)
                else:
                    lst1.append(0)

                if i in parsed_genres:
                    lst2.append(1)
                else:
                    lst2.append(0)
                cos += lst1[-1]*lst2[-1]
            
            if float((sum(lst1)*sum(lst2))**0.5)==0:
                continue
                
            cos = cos / float((sum(lst1)*sum(lst2))**0.5)
            if cos >= 0.75:
                counter += 1
                sim[user["user_id"]] = cos
                
            if counter == num_of_users:
                break
                    
                
    return sorted(sim, key=operator.itemgetter(1), reverse=True), top_rated_genres

In [192]:
user_group, top_genres = group_generation('76561197970982479', 100)

In [193]:
user_group

['My_Special_Place',
 'hybrid905',
 'Dythoras',
 'Uve-Seen_Nothing',
 'kummaz',
 'julezislive',
 'cronix0',
 'SpicyHolo',
 'zoobark',
 'lowanalawliet',
 'loadsamoney',
 'Torca2001',
 'potternerd',
 'Toothless_Agression',
 'anykylator',
 'flippylejeff',
 'okaygetrektscrub',
 'KiritoNZ',
 'niconicolii',
 'nicolasbrendo',
 'DidiKong',
 'shez13',
 'TheZomyo',
 'Aesirn',
 'Sentinel07',
 'Zeuaireus',
 'Deathjester96',
 'dekruz',
 'Kaka1800',
 'davo27',
 'dagster_007',
 'hammyhamm',
 'galasal',
 'Lamus',
 'Kaspa',
 'pandasmokingbamboo',
 'xScrivz',
 'LOLBITCHS',
 'FORTHMINGUTH',
 'ANIMEPUSSY',
 'ABSOLUTLY_NOTHING',
 '76561198078883335',
 '76561198000881087',
 '76561198095847205',
 '76561198068601332',
 '76561198040814960',
 '76561197989918287',
 '76561198087011672',
 '76561198090345356',
 '76561198078580381',
 '76561198046023819',
 '76561198027481760',
 '76561198079350354',
 '76561198064735291',
 '76561198030858298',
 '76561198039606680',
 '76561198058406533',
 '76561198006894551',
 '76561198

In [227]:
def get_multiplayer_games(steam_games):
    games = steam_games.copy()
    
    for i, row in games.iterrows():
        flag = False
        if type(row["specs"]) != list:
            games.drop([i], inplace=True)
            continue
            
        for spec in row["specs"]:
            if spec=="Online Multi-Player":
                flag = True
                break
                
        if flag==False:
            games.drop([i], inplace=True)
            
    games.reset_index(inplace=True)
    games["discount_price"].fillna(0, inplace=True)
    games["price"].fillna(0, inplace=True)
            
    return games

In [196]:
multiplayer_games = get_multiplayer_games(steam_games_df)

In [214]:
def get_most_similar_games(games, user_group, top_genres, num_of_games):
    sim = {}
    for i, row in games.iterrows():
        game_genres = row["genres"]
        
        lst1 = []
        lst2 = []
        cos = 0
        vector = list(set().union(top_genres, game_genres))
        for i in vector:
            if i in top_genres:
                lst1.append(1)
            else:
                lst1.append(0)
            if i in game_genres:
                lst2.append(1)
            else:
                lst2.append(0)
            cos += lst1[-1]*lst2[-1]
            
        if float((sum(lst1)*sum(lst2))**0.5)==0:
            continue
                
        sim[row["id"]] = cos / float((sum(lst1)*sum(lst2))**0.5)
    
    sim = sorted(sim.items(), key=operator.itemgetter(1), reverse=True)
    
    games_list = []
    for i in sim[:num_of_games]:
        games_list.append(i[0])
        
    return games_list

In [281]:
list_of_games = get_most_similar_games(multiplayer_games, user_group, top_genres, 10)

In [282]:
list_of_games

['413120',
 '666600',
 '515710',
 '638180',
 '462440',
 '254200',
 '434380',
 '366690',
 '562700',
 '582660']

In [286]:
def bundle_generation(games):
    bundle = {}
    bundle["name"] = "Made just for you"
    bundle["items"] = []
    discounted_total = 0
    total_price = 0
    for id_ in games:
        game = multiplayer_games[multiplayer_games["id"]==id_]
        
        item_info = {}
        item_info["item_id"] = id_
        item_info["genre"] = ', '.join(map(str, game["genres"].values[0]))
        item_info["name"] = game["title"].values[0]
        
        if type(game["price"]) != str:
            total_price += game["price"].values[0]
        
        discounted_total += game["discount_price"].values[0]
        
        bundle["items"].append(item_info)
    
    bundle["bundle_final_price"] = total_price - discounted_total
    
    return bundle

In [287]:
bundle = bundle_generation(list_of_games)

In [288]:
bundle

{'name': 'Made just for you',
 'items': [{'item_id': '413120',
   'genre': 'Action, Casual, Indie, Massively Multiplayer, Simulation, Sports, Strategy',
   'name': 'Tactics Forever'},
  {'item_id': '666600',
   'genre': 'Action, Adventure, Casual, Indie, RPG, Simulation, Strategy',
   'name': 'Zombie Town'},
  {'item_id': '515710',
   'genre': 'Action, Adventure, Casual, Free to Play, Indie, Massively Multiplayer, RPG, Simulation',
   'name': 'Creativerse - Pro'},
  {'item_id': '638180',
   'genre': 'Action, Adventure, Casual, Free to Play, Indie, Massively Multiplayer, RPG, Simulation',
   'name': 'Creativerse - Welcome Bundle'},
  {'item_id': '462440',
   'genre': 'Action, Adventure, Indie, Massively Multiplayer, RPG, Simulation, Strategy, Early Access',
   'name': 'ROKH'},
  {'item_id': '254200',
   'genre': 'Adventure, Casual, Indie, RPG, Simulation, Strategy',
   'name': 'FortressCraft Evolved!'},
  {'item_id': '434380',
   'genre': 'Action, Adventure, Casual, Indie, Massively Mul