# Imports

In [90]:
import csv
import os
import sys
# Spark imports
from pyspark.rdd import RDD
from pyspark.sql import DataFrame
from pyspark.sql.types import IntegerType
from pyspark.sql import SparkSession
from pyspark.sql.types import *
import operator
# Dask imports
import dask.bag as db
import dask.dataframe as df  # you can use Dask bags or dataframes
from csv import reader
import numpy as np
import pandas as pd
import datetime
from pyspark.sql.functions import *
import sklearn
from sklearn import preprocessing
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
# from scipy.sparse import csr_matrix
# import scipy as sp
import heapq
# from surprise import CoClustering
# from surprise import Dataset, Reader, SVD, accuracy
# from surprise import KNNBaseline


# Packages for model evaluation
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from time import time

# from surprise.model_selection import train_test_split
# from sklearn.model_selection import train_test_split

# Package to suppress warnings
import warnings
warnings.filterwarnings("ignore")
from pyspark.sql.functions import monotonically_increasing_id
# Packages for saving models
import pickle

from re import split
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, DoubleType, IntegerType, StringType, DateType
from pyspark.sql.functions import *
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.regression import RandomForestRegressor

In [2]:
def init_spark():
    spark = SparkSession \
        .builder \
        .appName("Python Spark SQL basic example") \
        .config("spark.some.config.option", "some-value") \
        .getOrCreate()
    return spark

In [3]:
spark=init_spark()

22/04/11 18:19:59 WARN Utils: Your hostname, Sams-MacBook-Pro-2.local resolves to a loopback address: 127.0.0.1; using 172.30.22.156 instead (on interface en0)
22/04/11 18:19:59 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
22/04/11 18:20:00 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
22/04/11 18:20:01 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


## Generate Data For Base Game DataSet

In [4]:
def generateData():
    path_to_write = parentPath+'/data/datasets'
    originalDataSet = pd.read_csv(parentPath+'/data/datasets/steam.csv')
    if os.path.exists(path_to_write+'/games.csv'):
        return pd.read_csv(path_to_write+'/games.csv')
    for row in originalDataSet.iterrows():
        row =row[1]
        totalRatings = (row['positive_ratings'] + row['negative_ratings'])
        if totalRatings == 0:
            ratings = 0
        else:
            ratings = round((row['positive_ratings']/totalRatings) * 100,2)
        dictionnary[row['appid']] = {
                    "name": row['name'],
                    "price": row['price'],
                    "release_date": row['release_date'],
                    "required_age": row['required_age'],
                    "publishers": row['publisher'],
                    "developers": row['developer'],
                    "categories": row['categories'],
                    "genres": row['genres'],
                    "ratings": ratings,
                    "totalRatings": totalRatings,
                    "average_playtime": row['average_playtime'],
                    "median_playtime": row['median_playtime'],
                    "num_owners": row['owners']
        }
    
    

    df = pd.DataFrame.from_dict(dictionnary, orient='index')
    df.index.name = 'appid'
    if not os.path.exists(path_to_write):
        os.mkdir(path_to_write)
    
    df.to_csv(path_to_write+'/games.csv')
    return df

## Generate the User Steam Dataset

In [5]:
url_user_info = "https://api.steampowered.com/IPlayerService/GetOwnedGames/v1/?key=D399B9CAE07939881781DD36CC8CC442&steamid={}&include_appinfo=true&include_played_free_games=true"
def generateDataForUserSteamDataset():
    dictionary=[]
    path_to_write = parentPath+'/data/datasets'
    if os.path.exists(path_to_write+'/steam_id_games.csv'):
        return pd.read_csv(path_to_write+'/steam_id_games.csv')
    originalDataSet = pd.read_csv(parentPath+'/data/datasets/steam_id.csv')
    for row in originalDataSet.iterrows():
        row =row[1]
        try:
            games = requests.get(url_user_info.format(row['steamid_a'])).json()['response']
        except:
            continue
        if 'games' not in games:
            continue
        dataset = [{'steam_id':row['steamid_a'],"appid" :str(game['appid']), "time_played_in_minutes": game['playtime_forever'] } for game in games['games']]
        dictionary = dataset+dictionary
    df = pd.DataFrame.from_records(dictionary)
    if not os.path.exists(path_to_write):
        os.mkdir(path_to_write)
    
    df.to_csv(path_to_write+'/steam_id_games.csv')
    return df

In [6]:
filename="../data/datasets/games.csv"
filename2="../data/datasets/steam_id_games.csv"

In [7]:
from pyspark.sql.types import *
games_schema = StructType() \
    .add("appid", IntegerType(), True) \
    .add("name", StringType(), True) \
    .add("price", DoubleType(), True) \
    .add("release_date", StringType(), True) \
    .add("required_age", IntegerType(), True) \
    .add("publishers", StringType(), True) \
    .add("developers", StringType(), True) \
    .add("categories", StringType(), True) \
    .add("genres", StringType(), True) \
    .add("ratings", DoubleType(), True) \
    .add("totalRatings", IntegerType(), True) \
    .add("average_playtime", DoubleType(), True) \
    .add("median_playtime", IntegerType(), True) \
    .add("num_owners", StringType(), True)
games = spark.read.schema(games_schema).csv(filename, header=True).dropna()


In [8]:
games.take(5)

[Row(appid=10, name='Counter-Strike', price=7.19, release_date='2000-11-01', required_age=0, publishers='Valve', developers='Valve', categories='Multi-player;Online Multi-Player;Local Multi-Player;Valve Anti-Cheat enabled', genres='Action', ratings=97.39, totalRatings=127873, average_playtime=17612.0, median_playtime=317, num_owners='10000000-20000000'),
 Row(appid=20, name='Team Fortress Classic', price=3.99, release_date='1999-04-01', required_age=0, publishers='Valve', developers='Valve', categories='Multi-player;Online Multi-Player;Local Multi-Player;Valve Anti-Cheat enabled', genres='Action', ratings=83.98, totalRatings=3951, average_playtime=277.0, median_playtime=62, num_owners='5000000-10000000'),
 Row(appid=30, name='Day of Defeat', price=3.99, release_date='2003-05-01', required_age=0, publishers='Valve', developers='Valve', categories='Multi-player;Valve Anti-Cheat enabled', genres='Action', ratings=89.56, totalRatings=3814, average_playtime=187.0, median_playtime=34, num_ow

## Normalization

In [9]:
# normalizer = Normalizer(inputCol="price", outputCol="price")
# myGames = normalizer.transform(myGames)
#hasher = FeatureHasher(inputCols=["price", "release_date", "required_age", "ratings","totalRatings","average_playtime","median_playtime","Action"],
#                       outputCol="features")

#featurized = hasher.transform(myGames)
#listOfGames=featurized.take(100)

#featurized.head()

#myGames3 = myGames.take(10000)
#normalizer = Normalizer(inputCol="features", outputCol="normFeatures", p=1.0)
#l1NormData = normalizer.transform(featurized)
#l1NormData.head()

# lInfNormData = normalizer.transform(featurized, {normalizer.p: float("inf")})
#listOfGames=l1NormData.take(10)
# listOfGames.reshape(-1, 1)
# X_normalized = normalizer(myGames, copy=True)
#type(listOfGames)
#listOfGames=np.array([listOfGames], dtype=object)

## Data Preprocessing

In [10]:
# get the yaer of release_date
games = games.withColumn('release_date', col('release_date')[0:4])
games = games.withColumn("release_date", games["release_date"].cast(IntegerType()))

In [11]:
#create columns for genres
games = games.withColumn('genres', split(col('genres'), ';'))
uniq_genres = games.select('genres').distinct().collect()
unic_genres_set = set()
for u in uniq_genres:
    for i in u.genres:
        unic_genres_set.add(i)
unic_genres_set

for i in unic_genres_set:
    games = games.withColumn(i,when(array_contains(games.genres,i), 1).otherwise(0))

                                                                                

In [12]:
# create columns for the categories
games = games.withColumn('categories', split(col('categories'), ';'))
uniq_cat = games.select('categories').distinct().collect()
unic_cat_set = set()
for u in uniq_cat:
    for i in u.categories:
        # if not i in games2.columns:
        #     games2 = games2.withColumn(i, 0)
        unic_cat_set.add(i)
unic_cat_set

for i in unic_cat_set:
    games = games.withColumn(i,when(array_contains(games.categories,i), 1).otherwise(0))

22/04/11 18:20:08 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.


In [13]:
# add the column to store the index of the row
games = games.select("*").withColumn("RowNumber", monotonically_increasing_id())
games.take(1)

[Row(appid=10, name='Counter-Strike', price=7.19, release_date=2000, required_age=0, publishers='Valve', developers='Valve', categories=['Multi-player', 'Online Multi-Player', 'Local Multi-Player', 'Valve Anti-Cheat enabled'], genres=['Action'], ratings=97.39, totalRatings=127873, average_playtime=17612.0, median_playtime=317, num_owners='10000000-20000000', Simulation=0, Design & Illustration=0, Video Production=0, Adventure=0, Audio Production=0, Casual=0, Sports=0, Early Access=0, Software Training=0, Education=0, Accounting=0, Violent=0, Massively Multiplayer=0, Documentary=0, Animation & Modeling=0, Utilities=0, Action=1, Indie=0, Racing=0, Free to Play=0, Sexual Content=0, Strategy=0, Tutorial=0, Photo Editing=0, Web Publishing=0, RPG=0, Game Development=0, Gore=0, Nudity=0, Co-op=0, Cross-Platform Multiplayer=0, Steam Achievements=0, Commentary available=0, SteamVR Collectibles=0, Steam Workshop=0, Steam Turn Notifications=0, Multi-player=1, Full controller support=0, Shared/Spl

In [14]:
# create a dataset with features that we need for the model
myGames=games.drop('appid',"developers","genres","name","publishers","categories","num_owners",)

In [15]:
myGames.head(1)

[Row(price=7.19, release_date=2000, required_age=0, ratings=97.39, totalRatings=127873, average_playtime=17612.0, median_playtime=317, Simulation=0, Design & Illustration=0, Video Production=0, Adventure=0, Audio Production=0, Casual=0, Sports=0, Early Access=0, Software Training=0, Education=0, Accounting=0, Violent=0, Massively Multiplayer=0, Documentary=0, Animation & Modeling=0, Utilities=0, Action=1, Indie=0, Racing=0, Free to Play=0, Sexual Content=0, Strategy=0, Tutorial=0, Photo Editing=0, Web Publishing=0, RPG=0, Game Development=0, Gore=0, Nudity=0, Co-op=0, Cross-Platform Multiplayer=0, Steam Achievements=0, Commentary available=0, SteamVR Collectibles=0, Steam Workshop=0, Steam Turn Notifications=0, Multi-player=1, Full controller support=0, Shared/Split Screen=0, Includes level editor=0, Local Multi-Player=1, Mods (require HL2)=0, Mods=0, VR Support=0, In-App Purchases=0, Steam Trading Cards=0, Online Multi-Player=1, Valve Anti-Cheat enabled=1, Captions available=0, Stats=

## First Model : Cosine Similarity

In [16]:
from sklearn.metrics.pairwise import cosine_similarity
myGames2 = myGames.take(27000)

cos_sim_data = cosine_similarity(myGames2)
cos_sim_data

                                                                                

array([[1.00000000e+00, 8.97414146e-01, 8.89050930e-01, ...,
        3.76547723e-09, 3.99609567e-09, 6.07002769e-09],
       [8.97414146e-01, 1.00000000e+00, 9.99682562e-01, ...,
        2.25443188e-04, 2.25443395e-04, 2.25445218e-04],
       [8.89050930e-01, 9.99682562e-01, 1.00000000e+00, ...,
        4.63813819e-04, 4.63814024e-04, 4.63815831e-04],
       ...,
       [3.76547723e-09, 2.25443188e-04, 4.63813819e-04, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [3.99609567e-09, 2.25443395e-04, 4.63814024e-04, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [6.07002769e-09, 2.25445218e-04, 4.63815831e-04, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00]])

In [17]:
type(myGames2)

list

In [81]:
# temp = pd.DataFrame(cos_sim_data)
# cos_sim_df = spark.createDataFrame(temp)
# cos_sim_df.take(5)

In [82]:
# The users dataframe is not currently used.
#users_df = spark.read.csv(users_csv, header=True, inferSchema=True)
# Init the users/games relation dataframe.
users_games_df = spark.read.csv(filename2, header=True, inferSchema=True).drop("_c0")

# Feature assembler: make a vector of all features.
inputColsList = ["ratings", "price", "release_date", "totalRatings"]
inputColsList.extend(unic_cat_set)
inputColsList.extend(unic_genres_set)
vector_assembler = VectorAssembler().setInputCols(inputColsList).setOutputCol("features")
vectorized_games = vector_assembler.transform(games)

# Join the vectorized games with the user/games relation table.
users_games_joined_df = users_games_df.join(vectorized_games, "appid", "inner")

In [179]:
# // I need to create a function that get the appId and find the N top similar apps!!!
def generate_top_N_recommendationsX(appId, N=10):
    # appId = 0
    emp_RDD = spark.sparkContext.emptyRDD()
    row = games.select("RowNumber", "appid").rdd.filter(lambda x: x['appid'] == appId).collect()
    if len(row) == 0:
        columns = StructType([StructField('name',
                                  StringType(), True),
                    StructField('RowNumber',
                                IntegerType(), True),
                    StructField('index',
                                IntegerType(), True),
                    StructField('score',
                                DoubleType(), True)])
        df = spark.createDataFrame(data = emp_RDD,
                           schema = columns)
        return df
    row = row[0]['RowNumber']
    sim_scores = list(enumerate(cos_sim_data[row]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:N]
    game_indices = [i[0] for i in sim_scores]
    tempArray = []
    tempArrayScore =[]
    for val in range(0,len(game_indices)):
        tempArray.append((game_indices[val], val))
        tempArrayScore.append((game_indices[val],sim_scores[val][1]))
    tempArray = dict(tempArray)
    tempArrayScore= dict(tempArrayScore)
    names = games.where(col("RowNumber").isin(game_indices)).select("name","RowNumber").rdd.map(lambda row: (row[0], row[1],tempArray[row[1]],float(tempArrayScore[row[1]]) )).toDF(['name','RowNumber','index','score']).sort('index')
    return names

def GetValueFromDataframe(_df,columnName):
    for row in _df.rdd.collect():
        return row[columnName]

In [114]:
equalnames = generate_top_N_recommendationsX(str(10))
equalnames.show()

0
[(1127, 0.9993242069743952), (895, 0.9990176266601736), (1491, 0.9989362257873207), (1421, 0.9987252665424053), (10, 0.9986169812280062), (1155, 0.998073755200959), (1173, 0.9975466116780457), (1772, 0.9972856364670377), (4712, 0.9972711169757251)]
Temp Array
**************************************************
{1127: 0, 895: 1, 1491: 2, 1421: 3, 10: 4, 1155: 5, 1173: 6, 1772: 7, 4712: 8}
TempArrayScore
**************************************************
{1127: 0.9993242069743952, 895: 0.9990176266601736, 1491: 0.9989362257873207, 1421: 0.9987252665424053, 10: 0.9986169812280062, 1155: 0.998073755200959, 1173: 0.9975466116780457, 1772: 0.9972856364670377, 4712: 0.9972711169757251}
+--------------------+---------+-----+------------------+
|                name|RowNumber|index|             score|
+--------------------+---------+-----+------------------+
|              Arma 3|     1127|    0|0.9993242069743952|
|Mount & Blade: Wa...|      895|    1|0.9990176266601736|
|Kerbal Space Program

In [190]:
def getRecommendationForUserCosineSim(user_id:int,num_recommendations:int =10):
    owned_games_df = users_games_df.filter(users_games_df.steam_id == user_id)
    emp_RDD = spark.sparkContext.emptyRDD()
    # Create an expected schema
    columns = StructType([StructField('name',
                                  StringType(), True),
                    StructField('RowNumber',
                                IntegerType(), True),
                    StructField('index',
                                IntegerType(), True),
                    StructField('score',
                                DoubleType(), True)])
    df = spark.createDataFrame(data = emp_RDD,
                           schema = columns)
    for row in owned_games_df.collect():
        df_temp = generate_top_N_recommendationsX(row.appid)
        df = df.union(df_temp)
    df.sort(col('score').desc()).show(num_recommendations)



In [192]:
getRecommendationForUserCosineSim(user_id=76561197960360459)

22/04/12 18:34:24 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB
22/04/12 18:37:40 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB

+--------------------+---------+-----+------------------+
|                name|RowNumber|index|             score|
+--------------------+---------+-----+------------------+
| Dimension Hunter VR|     9981|    0|0.9999997790062494|
|  Lode Runner Legacy|    14449|    0|0.9999997723123002|
|   DUNGEONS OF CHAOS|    14000|    0|0.9999996607369963|
|     Fausts Alptraum|    12673|    0|0.9999996328669667|
| Necromancer Returns|    14542|    1|0.9999995668308166|
|             Blockle|    14558|    2|0.9999995110937684|
|Alien Arena: Warr...|    14482|    3|0.9999995074103457|
|         Beyond Eden|    14561|    4|0.9999994309347743|
|Game Character Hu...|    11068|    0|0.9999993911961074|
|      Mount Wingsuit|     9971|    1|0.9999993357544238|
+--------------------+---------+-----+------------------+
only showing top 10 rows





## Model2 :  : Random Forest

In [84]:
def DoRandomForest(user_id, num_recommendations = 10, seed = 2022):
    # Join the vectorized games with the user/games relation table.
    users_games_joined_df = users_games_df.join(vectorized_games, "appid", "inner")

    # Get the data for the games the player has played.
    owned_games_df = users_games_joined_df.filter(users_games_joined_df.steam_id == user_id) #.filter(users_games_joined_df.time_played_in_minutes > 0)
    user_games_count = owned_games_df.count()
    print("Owned Games:", user_games_count)
    if user_games_count > 0:
        training_df, testing_df = owned_games_df.randomSplit([0.8, 0.2], seed)

        # Print the games this user has played the most (for comparison to final predictions).
        print("User's top games (from training set):")
        training_df.sort(training_df.time_played_in_minutes.desc()).select("name", "genres", "time_played_in_minutes").show()
        #owned_games_df.sort(owned_games_df.time_played_in_minutes.desc()).select("name", "genres", "time_played_in_minutes").show()

        # Extract the list of unowned games by removing the already owned ones.
        unowned_games_df = vectorized_games.join(owned_games_df, "appid", "leftanti")

        # Build Random Forest Regression based on time the user played each game in their library.
        randForest = RandomForestRegressor()
        model = randForest.fit(training_df.withColumnRenamed("time_played_in_minutes", "label"))

        # Run the model on unowned game data.
        predictions = model.transform(unowned_games_df).sort(col("prediction").desc())
        print("The user's top " + str(num_recommendations) + " predicted games (with predicted minutes played if they owned it):")
        predictions.select('appid', 'features', 'genres', 'name', 'prediction').show(num_recommendations)

        # Model evaluation via RMSE
        evaluator = RegressionEvaluator(labelCol="label", predictionCol="prediction")
        testing_df = model.transform(testing_df.withColumnRenamed("time_played_in_minutes", "label"))
        rmse = evaluator.evaluate(testing_df, {evaluator.metricName: "rmse"})
        print("RMSE:", rmse)
        testing_df.select('name', 'label', 'prediction').show()

In [85]:
DoRandomForest(76561197960360459)

Owned Games: 216
User's top games (from training set):


                                                                                

+--------------------+--------------------+----------------------+
|                name|              genres|time_played_in_minutes|
+--------------------+--------------------+----------------------+
|                DOOM|            [Action]|                  6566|
|          Anno 2070™|          [Strategy]|                  3850|
|           Tropico 5|[RPG, Simulation,...|                  3836|
|LEGO® Batman™ 3: ...| [Action, Adventure]|                  3699|
|Call of Duty®: Bl...|            [Action]|                  2774|
|     Game Dev Tycoon|[Casual, Indie, S...|                  2627|
|  Duke Nukem Forever|            [Action]|                  2432|
|    Castle Crashers®|[Action, Adventur...|                  2198|
|Jurassic World Ev...|[Simulation, Stra...|                  2025|
|Call of Duty®: In...| [Action, Adventure]|                  1922|
|Counter-Strike: G...|[Action, Free to ...|                  1884|
|      Rocket League®|[Action, Indie, R...|                  1

                                                                                

The user's top 10 predicted games (with predicted minutes played if they owned it):


                                                                                

+------+--------------------+--------------------+--------------------+------------------+
| appid|            features|              genres|                name|        prediction|
+------+--------------------+--------------------+--------------------+------------------+
|242760|(62,[0,1,2,3,4,11...|[Action, Adventur...|          The Forest| 4263.479916815211|
|582010|(62,[0,1,2,3,4,6,...|            [Action]|MONSTER HUNTER: W...| 3592.357189542484|
|413150|(62,[0,1,2,3,4,6,...|[Indie, RPG, Simu...|      Stardew Valley| 3165.954411764706|
|359550|(62,[0,1,2,3,4,11...|            [Action]|Tom Clancy's Rain...|3105.8143323996264|
|211820|(62,[0,1,2,3,4,5,...|[Action, Adventur...|           Starbound|2927.6708502502124|
|244850|(62,[0,1,2,3,4,6,...|[Action, Indie, S...|     Space Engineers| 2920.612727272727|
|676500|(62,[0,1,2,3,4,5,...|[Action, Adventur...|        Time Warpers|2696.1833333333334|
|365590|(62,[0,1,2,3,4,11...|[Action, Adventur...|Tom Clancy’s The ...|2653.0535480859007|

                                                                                

RMSE: 941.2422048558764


[Stage 170:>                                                        (0 + 1) / 1]

+--------------------+-----+------------------+
|                name|label|        prediction|
+--------------------+-----+------------------+
|       Day of Defeat|    0| 188.0674683539425|
|Half-Life: Blue S...|    0|14.659514788525689|
|Half-Life 2: Deat...|    0|196.30208373855788|
|Half-Life Deathma...|    0|197.05702975745126|
|       Left 4 Dead 2| 2002| 987.3943842921784|
|              Dota 2| 2324|1711.9699495822529|
|         Garry's Mod|    0|1788.6139355742296|
|Trackmania United...| 1099| 313.6481259167307|
|Runaway, A Road A...|   19|15.892058648174814|
|                RAGE|  103| 453.1433032855942|
|Grand Theft Auto:...|  105| 105.4676904716566|
|           Max Payne|    0|14.659514788525689|
|            Far Cry®|    0|14.659514788525689|
|Tom Clancy's Rain...|    0| 16.38642484535791|
|Tom Clancy's Spli...|    0|14.690742858701128|
|Tom Clancy's Spli...|   18|27.873875624084576|
|Tom Clancy's Rain...|    0| 62.61955761074137|
|Tom Clancy's Rain...|    6|118.23983321

                                                                                

In [23]:
# Kyle's Steam ID
DoRandomForest(76561197982716241)

Owned Games: 136
User's top games (from training set):


                                                                                

+--------------------+--------------------+----------------------+
|                name|              genres|time_played_in_minutes|
+--------------------+--------------------+----------------------+
|Counter-Strike: G...|[Action, Free to ...|                 30157|
|Sid Meier's Civil...|          [Strategy]|                 17603|
|       Killing Floor|            [Action]|                 16366|
|Kerbal Space Program| [Indie, Simulation]|                 11131|
|MechWarrior Onlin...|[Action, Free to ...|                  9815|
|Sid Meier’s Civil...|          [Strategy]|                  9192|
|Sins of a Solar E...|   [Indie, Strategy]|                  8750|
|Red Orchestra 2: ...|[Action, Massivel...|                  6684|
|             XCOM® 2|          [Strategy]|                  5412|
|Supreme Commander...|          [Strategy]|                  5283|
|Defense Grid: The...|   [Indie, Strategy]|                  5262|
|          BATTLETECH|[Action, Adventur...|                  4

                                                                                

The user's top 10 predicted games (with predicted minutes played if they owned it):


                                                                                

+------+--------------------+--------------------+--------------------+------------------+
| appid|            features|              genres|                name|        prediction|
+------+--------------------+--------------------+--------------------+------------------+
|   570|(62,[0,2,3,4,8,9,...|[Action, Free to ...|              Dota 2|15590.033333333331|
|218620|(62,[0,1,2,3,4,6,...|       [Action, RPG]|            PAYDAY 2|10985.930473530698|
|252490|(62,[0,1,2,3,4,5,...|[Action, Adventur...|                Rust|  9980.47803030303|
| 48700|(62,[0,1,2,3,6,9,...|       [Action, RPG]|Mount & Blade: Wa...| 9398.527976190477|
|301520|(62,[0,2,3,6,11,1...|[Action, Free to ...|           Robocraft| 9160.465106335107|
|  4000|(62,[0,1,2,3,4,5,...| [Indie, Simulation]|         Garry's Mod| 8977.478310502283|
|444090|(62,[0,2,3,6,11,1...|[Action, Free to ...|           Paladins®| 8890.836813380498|
|227300|(62,[0,1,2,3,6,9,...| [Indie, Simulation]|Euro Truck Simula...| 8152.217142857144|

                                                                                

RMSE: 2218.9550666779037


[Stage 69:>                                                         (0 + 1) / 1]

+--------------------+-----+------------------+
|                name|label|        prediction|
+--------------------+-----+------------------+
|Half-Life: Opposi...| 1836| 1545.660263009284|
|Counter-Strike: S...| 1959| 4518.873876232849|
|Half-Life 2: Deat...|   10| 567.7622074537285|
|Half-Life Deathma...|    0|438.09626923036285|
|       Left 4 Dead 2|    0| 8531.583333333334|
|            Portal 2| 1135|          4628.985|
|          Multiwinia|    0| 666.7139541264853|
|      Wolfenstein 3D|  224| 435.4386306092694|
|       Ultimate Doom|    3|267.04755765444645|
|            QUAKE II|  811| 278.9238397057285|
|Star Wars: Battle...| 1895| 1250.876784691462|
|Civilization IV: ...|    0| 2565.504111946295|
|Call of Duty: Wor...|   33| 644.8975185731572|
|Unreal 2: The Awa...|  491| 484.7080032202219|
|Unreal Tournament...| 6126|1954.6074917103601|
|Unreal Tournament...| 5266|1108.9301016004692|
|            Far Cry®|  377|  516.762392465236|
|Sid Meier's Civil...|    0| 656.8400742

                                                                                