In [1]:
# importing libraries
import json
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import operator
import math
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize 

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/stevenlouie/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# Process data files

In [2]:
# function used to process the data files
def eval_file(file):
    f = open(file, "r", encoding="utf-8")
    lst = []
    for line in f:
        obj = eval(line)
        lst.append(obj)
    json.dumps(lst)
    return lst

In [3]:
# process data files into respective variables
bundle_data = eval_file("../data/bundle_data.json")
users_items = eval_file("../data/australian_users_items.json")
steam_games = eval_file("../data/steam_games.json")

In [4]:
# create a dataframe from bundle_data that consists of all the games within each bundle
bundle_df = json_normalize(bundle_data, "items", ["bundle_final_price", "bundle_url", "bundle_price", "bundle_name", "bundle_id"])
# load games data in as a dataframe
steam_games_df = pd.DataFrame(data=steam_games)

# Data Preprocessing

For data preprocessing, we will be cleaning up the datasets by removing any empty entries in the datasets. This will allow us to build a more effective recommender's system using data with non-empty entries.

We will be removing any games in the bundle's data that have no genre. Games that have no genre are not playable games and are probably media data such as movies, videos, songs, etc. These types of data will not be useful in our implementation, so it is best to filter them out.

In [None]:
# function used to remove any games in the bundle's data that have no genre
def clean_bundle_data(df):    
    clean_bundle_df = df[df["genre"]!=""]
    return clean_bundle_df

Some of the features in the steam games dataframe are duplicates of other features. For instance, 'app_name' and 'title' are two features in the dataset that mean the same thing and have the same value. Same goes for 'tags' where 'genres' mean and have the same value in the dataset. These extra features are of no use to us and will only make it harder to understand the dataset.

In [None]:
# remove unwanted features in steam games dataframe
def clean_games_data():
    steam_games_df.drop(labels=["app_name", "tags", "metascore"], axis=1, inplace=True)
    steam_games_df.dropna(subset=["title", "genres"], inplace=True)

Users that have no game history have no use in our recommender's system implementation because these users have nothing to offer, so it is best to filter them out.

In [7]:
# function used to remove any users that have not played any games
def remove_idle_users():
    # loop through users data and remove users that are idle
    for i in users_items:
        if i["items_count"]==0:
            users_items.remove(i)

In [8]:
# function used to perform the aforementioned preprocessing techniques
def perform_data_preprocessing():
    clean_games_data()
    remove_idle_users()

In the user-items dataset, there are users that have played hundreds and hundreds of games but not all data are useful and meaningful to us. For instance, users that might've downloaded a game but have not played or played long enough to show any interest in the game are not useful because users that like to play a specific type of game or game will show more playing time. Therefore, this function is dedicated to filter out any items in the active user's items list that have little to no play time at all because noisy data will only skew and affect our results in a negative way.

In [9]:
# function used to filter out any games user has played with little to no playing time at all
# returns a dataframe of relevant games user has played
def filter_by_playtime(user_id, playtime):
    user_data = {}
    
    for user in users_items:
        if user["user_id"]==user_id:
            user_data = user
            break
    
    filtered_df = json_normalize(user_data, "items", ["user_id", "steam_id", "user_url"])
    filtered_df = filtered_df[filtered_df["playtime_forever"] > playtime]
    
    return filtered_df

In [10]:
# function used to extract every genre in every bundle
def extract_genres_data_from_bundles(bundle_df): 
    
    clean_bundle_df = clean_bundle_data(bundle_df)
    
    # uses an object to hold key value pairs
    # keys will be bundle id and values will be genres of games inside that bundle
    filter_genre_from_bundle = {}
    
    # loop through every bundle and extract genres
    for id_ in clean_bundle_df["bundle_id"].unique():
        genres_list = []
        for genres in clean_bundle_df[clean_bundle_df["bundle_id"]==id_]["genre"]:
            for genre in genres.split(", "):
                if genre not in genres_list:
                    genres_list.append(genre)
        filter_genre_from_bundle[id_] = genres_list
        
    return filter_genre_from_bundle

In [11]:
perform_data_preprocessing()

In [12]:
steam_games_df.head()

Unnamed: 0,publisher,genres,title,url,release_date,discount_price,reviews_url,specs,price,early_access,id,developer,sentiment
0,Kotoshiro,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,http://store.steampowered.com/app/761140/Lost_...,2018-01-04,4.49,http://steamcommunity.com/app/761140/reviews/?...,[Single-player],4.99,False,761140,Kotoshiro,
1,"Making Fun, Inc.","[Free to Play, Indie, RPG, Strategy]",Ironbound,http://store.steampowered.com/app/643980/Ironb...,2018-01-04,,http://steamcommunity.com/app/643980/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free To Play,False,643980,Secret Level SRL,Mostly Positive
2,Poolians.com,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,http://store.steampowered.com/app/670290/Real_...,2017-07-24,,http://steamcommunity.com/app/670290/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free to Play,False,670290,Poolians.com,Mostly Positive
3,彼岸领域,"[Action, Adventure, Casual]",弹炸人2222,http://store.steampowered.com/app/767400/2222/,2017-12-07,0.83,http://steamcommunity.com/app/767400/reviews/?...,[Single-player],0.99,False,767400,彼岸领域,
5,Trickjump Games Ltd,"[Action, Adventure, Simulation]",Battle Royale Trainer,http://store.steampowered.com/app/772540/Battl...,2018-01-04,,http://steamcommunity.com/app/772540/reviews/?...,"[Single-player, Steam Achievements]",3.99,False,772540,Trickjump Games Ltd,Mixed


In [13]:
all_bundles_genres_data = extract_genres_data_from_bundles(bundle_df)

In [44]:
np.random.shuffle(users_items)

In [45]:
users_items[0]["user_id"]

'xshun22xx'

In [46]:
filtered_playtime_df = filter_by_playtime(users_items[0]["user_id"], 60)

# Recommend bundles to a user based on most played genres

The first use case of our project is using a content-based recommendation system to recommend bundles to the active user based on the user's top played genres. This is similar to a user logging into Steam and seeing a list of recommendations based on the history of games that user has played. We are only recommending bundles that are most similar to the active user because this would improve user experience by recommending bundles that contain games that user has previously shown interest in playing.

In [47]:
# function used to calculate the cosine similarity between two given vectors
def cosine_sim(vector1, vector2):
    
    # holds the cosine similarity score of the two vectors
    cos = 0
    
    # used to store 0s or 1s for respective vector
    lst1 = []
    lst2 = []
    
    # gets the elements that appear in both vectors and store them in a list
    vector = list(set().union(vector1, vector2))
    
    # for every element in both vectors, append 1 to lst1 if element appears in vectors otherwise append 0
    # repeat this for both vectors
    for i in vector:
        if i in vector1:
            lst1.append(1)
        else:
            lst1.append(0)    
        if i in vector2:
            lst2.append(1)
        else:
            lst2.append(0)
        cos += lst1[-1]*lst2[-1]
    
    # used to hold the magnitude of cosine which is the multiple of the length vectors
    magnitude = float((sum(lst1)*sum(lst2))**0.5)
    
    # only calculate cosine similarity if magnitude/denominator is not zero
    if magnitude != 0:
        cos = cos / magnitude
    
    return cos, magnitude

The function takes the previously filtered dataframe that has filtered out games that user have not played or have little play time to extract every type of genre of games user has played. This will return a list of sorted genres in descending order which will later be used to extract only the top genres user has played.

In [48]:
# function used to extract every genre that user has played from the previously filtered dataframe
def get_all_genres_by_user(user):
    genre_count = {}
    for index, row in user.iterrows():
        if steam_games_df[steam_games_df["id"]==row["item_id"]].index.any()==True:
            game_genres = steam_games_df[steam_games_df["id"]==row["item_id"]]["genres"].values[0]
            for genre in game_genres:
                if genre not in genre_count:
                    genre_count[genre] = 1
                else:
                    genre_count[genre] = genre_count[genre] + 1

    return sorted(genre_count.items(), key=operator.itemgetter(1), reverse=True)

In [49]:
# function used to get the top genres user has played
# this will give us a better understanding of what type of games user likes to play and only recommend relevant games
def parse_genres(genres):
    lst = []
    for genre in genres[:math.ceil(len(genres)*.7)]:
        lst.append(genre[0])
    return lst

In order to recommend relevant bundles to the user, we have to calculate the cosine similarity between user's top genres played and genres of each bundle. This function will return a list of sorted bundles in descending order which will be used later to filter out and recommend the most similar bundles based on user's top played genres.

In [50]:
# function used to calculate the similarity scores between user's top genres played and genres of each bundle
def get_bundle_similarity_scores(user_genres, all_bundles_genres_data):
    sim = {}
    
    for id_ in list(all_bundles_genres_data.keys()):
        bundle_genres = all_bundles_genres_data[id_]

        cos,_ = cosine_sim(user_genres, bundle_genres)
        
        sim[id_] = cos
    
    return sorted(sim.items(), key=operator.itemgetter(1), reverse=True)

In [51]:
all_user_genres = get_all_genres_by_user(filtered_playtime_df)

In [52]:
top_rated_genres = parse_genres(all_user_genres)

In [53]:
bundle_sims = get_bundle_similarity_scores(top_rated_genres, all_bundles_genres_data)

The following results are in the form of a tuple (bundle id, cosine similarity score) and the bundles in descending order and will be used to get the top k bundles to recommend to the users.

In [54]:
bundle_sims

[('476', 0.9128709291752769),
 ('653', 0.8164965809277261),
 ('1068', 0.8164965809277261),
 ('1130', 0.8164965809277261),
 ('474', 0.8164965809277261),
 ('302', 0.8164965809277261),
 ('336', 0.8164965809277261),
 ('385', 0.8164965809277261),
 ('428', 0.8164965809277261),
 ('450', 0.7302967433402214),
 ('1222', 0.7302967433402214),
 ('347', 0.7302967433402214),
 ('850', 0.7302967433402214),
 ('990', 0.7302967433402214),
 ('854', 0.7302967433402214),
 ('728', 0.7302967433402214),
 ('153', 0.7302967433402214),
 ('260', 0.7302967433402214),
 ('514', 0.7302967433402214),
 ('808', 0.7216878364870323),
 ('663', 0.7216878364870323),
 ('658', 0.7071067811865476),
 ('1392', 0.7071067811865476),
 ('1349', 0.7071067811865476),
 ('604', 0.7071067811865476),
 ('903', 0.7071067811865476),
 ('1330', 0.7071067811865476),
 ('869', 0.7071067811865476),
 ('1191', 0.7071067811865476),
 ('490', 0.7071067811865476),
 ('281', 0.7071067811865476),
 ('341', 0.7071067811865476),
 ('1196', 0.7071067811865476),
 (

In [55]:
# function takes the number of bundle recommendations to recommend to the active user
def get_bundle_recommendations(user_id, num_recommendations, sim, bundle_data=bundle_data):
    
    # list used to hold the top n bundles and recommend them to the user
    recommendations = []
    for id_ in sim[:num_recommendations]:
        for bundle in bundle_data:
            if bundle["bundle_id"]==id_[0]:
                recommendations.append(bundle)
                break
    
    return recommendations

In [56]:
recommendations = get_bundle_recommendations(users_items[0]["user_id"], 3, bundle_sims)

The final results our our content-based recommender system is a list of bundles that were recommended to the active user based on the top played genres by the active user. Our algorithm only recommends the most similar bundles to the active user in order to maximize the user experience because recommending a bundle that user have no interest in will not be a very useful recommender's system.

In [57]:
recommendations

[{'bundle_final_price': '$16.76',
  'bundle_url': 'http://store.steampowered.com/bundle/476/?utm_source=SteamDB&utm_medium=SteamDB&utm_campaign=SteamDB%20Bundles%20Page',
  'bundle_price': '$20.96',
  'bundle_name': 'Hero Siege Complete',
  'bundle_id': '476',
  'items': [{'genre': 'Action, Adventure, Indie, Massively Multiplayer, RPG',
    'item_id': '269210',
    'discounted_price': '$5.99',
    'item_url': 'http://store.steampowered.com/app/269210',
    'item_name': 'Hero Siege'},
   {'genre': 'Action, Adventure, Indie, Massively Multiplayer, RPG',
    'item_id': '312290',
    'discounted_price': '$4.99',
    'item_url': 'http://store.steampowered.com/app/312290',
    'item_name': "Hero Siege - The Karp of Doom (Digital Collector's Edition)"},
   {'genre': 'Action, Adventure, Indie, Massively Multiplayer, RPG',
    'item_id': '331181',
    'discounted_price': '$4.99',
    'item_url': 'http://store.steampowered.com/app/331181',
    'item_name': "Hero Siege - The Depths of Hell (Colle

# Group Generation

The second use case that we'll be covering is recommending personalized bundles to a group of users. On the Steam platform, there are community groups where users are able to interact and share their progress on a game and we wanted to implement a recommender's system that can effectively recommend personalized bundles for the group of users to enjoy and play together.

What we are doing here is creating a community of users that are most similar (users that show interest in similar genres of games) to the active user. The purpose of generating a group of users that are most similar to one another is to ensure that the personalized bundle that we will be creating will soothe and appease all users in the community. This approach is better than random selection of users because not everyone will like the same types of games.

In [117]:
# function used to generate a group of users that are most similar (users that have played similar genres of games) to the active user
# function takes two parameters:
# user_id: the id of the active user
# num_of_users: the total number of users to form a community for
def group_generation(user_id, num_of_users):
    
    # used to store the active user's data
    user_data = {}
    
    for user in users_items:
        if user["user_id"]==user_id:
            user_data = user
            break
            
    filtered_df = json_normalize(user_data, "items", ["user_id", "steam_id", "user_url"])
    filtered_df = filtered_df[filtered_df["playtime_forever"] > 100]
    
    # gets the list of top genres played by the active user
    genres = get_all_genres_by_user(filtered_df)
    top_rated_genres = parse_genres(genres)

    # used to store the most similar users to the active user
    sim = {}
    sim[user_id] = 1
    counter = 1
    
    # loop through all users and find most similar users
    for user in users_items:
        if user["user_id"]!=user_id:
            user_df = json_normalize(user, "items", ["user_id", "steam_id", "user_url"])
            g = get_all_genres_by_user(user_df)
            parsed_genres = parse_genres(g)
            
            cos, magnitude = cosine_sim(top_rated_genres, parsed_genres)
            
            if magnitude==0:
                continue
                
            if cos >= 0.85:
                counter += 1
                sim[user["user_id"]] = cos
                
            if counter == num_of_users:
                break
                    
    return sorted(sim, key=operator.itemgetter(1), reverse=True)

In [118]:
users_group = group_generation(users_items[0]["user_id"], 100)

The results of group generation based on the active user can be seen below. The user id's of similar users are stored in a list that will be used later for bundle generation and recommendation.

In [119]:
users_group

['ryzzler',
 'wyatt579',
 'aveta100',
 'turd_dumping',
 'fullysik75',
 'xshun22xx',
 'Specied',
 'Spector011',
 'Spuke134',
 'jonjon4351',
 'holidays_',
 'yoflyhoneys',
 'Wildfyre',
 'aikaholism',
 'bhSBM',
 'ShepThePotato',
 'chicken_steak',
 'jgjgjgjgjgjgjgjgjgjg',
 'kenpro',
 'deltaboy3',
 'meemsss',
 'heathhhhhhhhhhh',
 'Sekekai',
 'leejiajun',
 'galahz',
 'baconisyourmaster',
 'Casper11243',
 'parkachu',
 'MadTheck',
 'kappamillionswag',
 'jamescalimquim19',
 'Tagged69',
 '8KMMRPudgeONLY',
 'OHDAYUMMAHNIGGA',
 'MCTXgaming',
 '76561198085888060',
 '76561198069755218',
 '76561198100050147',
 '76561198061777437',
 '76561198068715658',
 '76561198062728303',
 '76561198014183294',
 '76561198257270755',
 '76561198069817222',
 '76561198092381867',
 '76561198051640600',
 '76561198075528164',
 '76561198045955978',
 '76561198015039559',
 '76561198070651185',
 '76561198093924479',
 '76561198022274007',
 '76561198091118272',
 '76561198087806857',
 '76561198054739976',
 '76561198089991627',
 '7

# Bundle Generation and Recommedation

Generating a community of users was the first part of this use case, generating a personalized bundle and recommending the group of users this bundle is the second part. How bundle generation works is because we are working with a group of users as opposed to a single user, we will only focus on the online multiplayer games in the steam games dataset for our bundle generation. This will allow users in the community to play and enjoy the games together to their fit.

In [120]:
# function used to filter out only the multiplayer games in the steam games dataset
# function parameters:
# steam_games: steam games dataset
def get_multiplayer_games(steam_games):
    games = steam_games.copy()
    
    # loop through each game in the steam games dataset to find only the online multiplayer games 
    for i, row in games.iterrows():
        flag = False
        if type(row["specs"]) != list:
            games.drop([i], inplace=True)
            continue
            
        for spec in row["specs"]:
            if spec=="Online Multi-Player":
                flag = True
                break
                
        if flag==False:
            games.drop([i], inplace=True)
            
    games.reset_index(inplace=True)
    
    # used to fill in any empty values for discount price and price b/c free to play items are string values
    games["discount_price"].fillna(0, inplace=True)
    games["price"].fillna(0, inplace=True)
    
    return games

In [121]:
multiplayer_games = get_multiplayer_games(steam_games_df)

This is done to get the top genres from the users group which will be used to generate a personalized bundle for the users group to enjoy.

In [122]:
# function used to get the top genres from the users group
# function parameters:
# users_group: the user group generated
def get_top_genres_from_users_group(users_group):
    all_genres = {}
    
    for user in users_group:
        user_data = {}
        for u in users_items:
            if u["user_id"]==user:
                user_data = u
                break
            
        filtered_df = json_normalize(user_data, "items", ["user_id", "steam_id", "user_url"])
    
        genres = get_all_genres_by_user(filtered_df)
    
        for genre in genres:
            if genre[0] not in all_genres:
                all_genres[genre[0]] = genre[1]
            else:
                all_genres[genre[0]] += genre[1]
          
    all_genres = sorted(all_genres.items(), key=operator.itemgetter(1), reverse=True)
    
    return parse_genres(all_genres)

In [123]:
top_users_group_genres = get_top_genres_from_users_group(users_group)

As you can see here are the top genres from the users group, which is not much different than the top genres of the active user. This is expected because only similar users were picked to form the users group.

In [124]:
top_users_group_genres

['Action',
 'Indie',
 'Free to Play',
 'Adventure',
 'RPG',
 'Massively Multiplayer',
 'Strategy',
 'Simulation',
 'Casual',
 'Early Access']

In [125]:
# function is used to get top n games from the multiplayer's dataframe
# function parameters are:
# games: multiplayer dataframe
# users_group: the group of users generated
# top_genres: the top genres from the users group
# num_of_games: the number of games to select for bundle generation
def get_most_similar_games(games, users_group, top_genres, num_of_games):
    sim = {}
    for i, row in games.iterrows():
        game_genres = row["genres"]
        
        cos, magnitude = cosine_sim(top_genres, game_genres)
        
        if magnitude==0:
            continue
                
        sim[row["id"]] = cos
    
    sim = sorted(sim.items(), key=operator.itemgetter(1), reverse=True)
    
    # only return the given number of games which will be used to form the bundle
    games_list = []
    for i in sim[:num_of_games]:
        games_list.append(i[0])
        
    return games_list

In [126]:
list_of_games = get_most_similar_games(multiplayer_games, users_group, top_users_group_genres, 10)

Here are the list of games that were selected based on the top genres from the users group. These games will be used to generate a personalized bundle based off of genres of games every user in the group has played or have interest in.

In [127]:
list_of_games

['515710',
 '638180',
 '462440',
 '622470',
 '496510',
 '516510',
 '666600',
 '658420',
 '746090',
 '552110']

In [128]:
# function used to generate a personalized bundle based off of games selected
def bundle_generation(games):
    
    # object used to store bundle data
    bundle = {}
    bundle["bundle_name"] = "Made just for you"
    bundle["bundle_id"] = "MADE FOR YOU"
    bundle["items"] = []
    discounted_total = 0
    total_price = 0
    
    # for each game selected, generate item data for it
    for id_ in games:
        game = multiplayer_games[multiplayer_games["id"]==id_]
        
        item_info = {}
        item_info["item_id"] = id_
        item_info["genre"] = ', '.join(map(str, game["genres"].values[0]))
        item_info["name"] = game["title"].values[0]
        
        if type(game["price"].values[0]) != str:
            total_price += game["price"].values[0]
        else:
            total_price += 0
        
        discounted_total += game["discount_price"].values[0]
        
        bundle["items"].append(item_info)
    
    # the final price of the bundle will be prices of all the games minus the discounts steam has provided
    bundle["bundle_final_price"] = total_price - discounted_total
    
    return bundle

In [129]:
personalized_bundle = bundle_generation(list_of_games)

Here is the personalized bundle generated for the group of users that were selected. These games were hand picked and the bundle was designed based on the genres of games users in the group has played the most. This personalized bundle will be used to evaluate against all the pre-existing bundles in the bundles dataset to see how well our personalized bundle perform against the existing bundles.

In [130]:
personalized_bundle

{'bundle_name': 'Made just for you',
 'bundle_id': 'MADE FOR YOU',
 'items': [{'item_id': '515710',
   'genre': 'Action, Adventure, Casual, Free to Play, Indie, Massively Multiplayer, RPG, Simulation',
   'name': 'Creativerse - Pro'},
  {'item_id': '638180',
   'genre': 'Action, Adventure, Casual, Free to Play, Indie, Massively Multiplayer, RPG, Simulation',
   'name': 'Creativerse - Welcome Bundle'},
  {'item_id': '462440',
   'genre': 'Action, Adventure, Indie, Massively Multiplayer, RPG, Simulation, Strategy, Early Access',
   'name': 'ROKH'},
  {'item_id': '622470',
   'genre': 'Action, Adventure, Free to Play, Indie, Massively Multiplayer, Racing, RPG, Strategy, Early Access',
   'name': 'Road Dogs'},
  {'item_id': '496510',
   'genre': 'Action, Adventure, Indie, RPG, Simulation, Strategy, Early Access',
   'name': 'M.EXE'},
  {'item_id': '516510',
   'genre': 'Action, Adventure, Free to Play, Indie, Massively Multiplayer, RPG, Strategy',
   'name': 'Orake 2D MMORPG'},
  {'item_id

In [131]:
genres_from_personalized_bundle = extract_genres_data_from_bundles(json_normalize(personalized_bundle, "items", ["bundle_final_price", "bundle_name", "bundle_id"]))

In [132]:
genres_from_personalized_bundle

{'MADE FOR YOU': ['Action',
  'Adventure',
  'Casual',
  'Free to Play',
  'Indie',
  'Massively Multiplayer',
  'RPG',
  'Simulation',
  'Strategy',
  'Early Access',
  'Racing']}

Now we will perform evaluation on the personalized bundle to see how the newly personalized bundle holds up against the pre-existing bundles. The results of this function will be the cosine similarity scores between top genres of the users group and all bundles including the personalized bundle in descending order because we want to visualize how well the new bundle performed.

In [137]:
# function is used to evaluate the personalized bundle against all pre-existing bundles
def evaluate_generated_bundle(users_group, all_bundles_genres_data, bundle_sim):
    sim = {}
    for user in user_group:
        user_data = {}
        
        for u in users_items:
            if u["user_id"]==user:
                user_data = u
                break

        filtered_df = json_normalize(user_data, "items", ["user_id", "steam_id", "user_url"])
        filtered_df = filtered_df[filtered_df["playtime_forever"] > 100]

        genres = get_all_genres_by_user(filtered_df)
        top_rated_genres = parse_genres(genres)
        
        for id_ in list(all_bundles_genres_data.keys()):
            if id_ not in sim:
                sim[id_] = 0
                
            bundle_genres = all_bundles_genres_data[id_]

            cos,_ = cosine_sim(top_rated_genres, bundle_genres)

            sim[id_] += cos
            
        for id_ in list(bundle_sim.keys()):
            if id_ not in sim:
                sim[id_] = 0
                
            bundle_genres = bundle_sim[id_]

            cos,_ = cosine_sim(top_rated_genres, bundle_genres)

            sim[id_] += cos
            
    return sorted(sim.items(), key=operator.itemgetter(1), reverse=True)

In [134]:
bundle_similarities = evaluate_generated_bundle(users_group, all_bundles_genres_data, genres_from_generated_bundle)

In [135]:
bundle_similarities

[('MADE FOR YOU', 73.07748555693053),
 ('476', 73.05045281539213),
 ('808', 72.98715003761247),
 ('948', 72.50834568274733),
 ('653', 69.98162542142518),
 ('1068', 69.98162542142518),
 ('1130', 69.98162542142518),
 ('474', 69.98162542142518),
 ('302', 69.98162542142518),
 ('336', 69.98162542142518),
 ('385', 69.98162542142518),
 ('428', 69.98162542142518),
 ('646', 69.9750081705524),
 ('339', 69.9750081705524),
 ('824', 69.87178459952969),
 ('623', 69.87178459952969),
 ('1222', 69.29914821200776),
 ('663', 69.20734618056426),
 ('396', 69.18878078950586),
 ('505', 69.18878078950586),
 ('364', 69.18878078950586),
 ('399', 69.18878078950586),
 ('482', 69.18878078950586),
 ('957', 69.18878078950586),
 ('804', 69.18878078950586),
 ('403', 69.18878078950586),
 ('542', 69.18878078950586),
 ('356', 69.18878078950586),
 ('347', 67.91803297990585),
 ('990', 67.91803297990585),
 ('728', 67.91803297990585),
 ('260', 67.91803297990585),
 ('450', 67.80495722327511),
 ('854', 67.80495722327511),
 ('1

As you can see in our output there, the personalized bundle performed the best out of all bundles in the bundles data. This means that our personalized bundle caters to the users group better than any other bundle in the bundles data. It is no surprise that our bundle performs well with the users group because this was generated using the top games the each user in the users group has played.

In [141]:
print("Our personalized bundle sits in the top {}th percentile of all bundles data.".format(int(100 - [x[0] for x in bundle_similarities].index("MADE FOR YOU")+1/len(bundle_similarities) * 100)))

Our personalized bundle sits in the top 100th percentile of all bundles data.
