In [2]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc, average_precision_score
from sklearn.mixture import GaussianMixture
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior() 
from tensorflow.python.framework import ops
from collections import Counter
import pickle

In [3]:
path = '/home/jewelle/data_bootcamp/LHL-final-project/'

df = pd.read_csv(path + 'final.csv')

In [4]:
df.head()

Unnamed: 0,userID,game,action,hours_played,all_reviews,developer,publisher,popular_tags,game_details,genre
0,151603712,Fallout 4,purchase,1.0,"Mostly Positive,(90,387),- 70% of the 90,387 u...",Bethesda Game Studios,"Bethesda Softworks,Bethesda Softworks","Open World,Post-apocalyptic,Exploration,Single...","Single-player,Steam Achievements,Full controll...",RPG
1,151603712,Fallout 4,play,87.0,"Mostly Positive,(90,387),- 70% of the 90,387 u...",Bethesda Game Studios,"Bethesda Softworks,Bethesda Softworks","Open World,Post-apocalyptic,Exploration,Single...","Single-player,Steam Achievements,Full controll...",RPG
2,87445402,Fallout 4,purchase,1.0,"Mostly Positive,(90,387),- 70% of the 90,387 u...",Bethesda Game Studios,"Bethesda Softworks,Bethesda Softworks","Open World,Post-apocalyptic,Exploration,Single...","Single-player,Steam Achievements,Full controll...",RPG
3,87445402,Fallout 4,play,83.0,"Mostly Positive,(90,387),- 70% of the 90,387 u...",Bethesda Game Studios,"Bethesda Softworks,Bethesda Softworks","Open World,Post-apocalyptic,Exploration,Single...","Single-player,Steam Achievements,Full controll...",RPG
4,25096601,Fallout 4,purchase,1.0,"Mostly Positive,(90,387),- 70% of the 90,387 u...",Bethesda Game Studios,"Bethesda Softworks,Bethesda Softworks","Open World,Post-apocalyptic,Exploration,Single...","Single-player,Steam Achievements,Full controll...",RPG


In [5]:
df.loc[(df['action'] == 'purchase') & (df['hours_played'] == 1.0), 'hours_played'] = 0
df = df.sort_values(['userID', 'game', 'hours_played'])
clean_df = df.drop_duplicates(['userID', 'game'], keep = 'last')

In [6]:
clean_df.drop(['action'], axis=1, inplace=True)

In [8]:
clean_df.shape

(56137, 9)

In [7]:
clean_df.head()

Unnamed: 0,userID,game,hours_played,all_reviews,developer,publisher,popular_tags,game_details,genre
51896,5250,Alien Swarm,4.9,"Very Positive,(18,439),- 94% of the 18,439 use...",Valve,"Valve,Valve","Free to Play,Co-op,Action,Multiplayer,Aliens,O...","Single-player,Multi-player,Co-op,Steam Achieve...",Action
58359,5250,Counter-Strike,0.0,"Overwhelmingly Positive,(66,438),- 96% of the ...",Valve,"Valve,Valve","Action,FPS,Multiplayer,Shooter,Classic,Team-Ba...","Multi-player,Online Multi-Player,Local Multi-P...",Action
59534,5250,Day of Defeat,0.0,"Very Positive,(2,022),- 86% of the 2,022 user ...",Valve,"Valve,Valve","FPS,World War II,Multiplayer,Shooter,Action,Wa...","Multi-player,Valve Anti-Cheat enabled",Action
60143,5250,Deathmatch Classic,0.0,"Very Positive,(953),- 80% of the 953 user revi...",Valve,"Valve,Valve","Action,FPS,Classic,Multiplayer,Shooter,First-P...","Multi-player,Online Multi-Player,Local Multi-P...",Action
12808,5250,Dota 2,0.2,"Very Positive,(1,015,621),- 85% of the 1,015,6...",Valve,"Valve,Valve","Free to Play,MOBA,Multiplayer,Strategy,e-sport...","Multi-player,Co-op,Steam Trading Cards,Steam W...","Action,Free to Play,Strategy"


In [10]:
#check for missing values in clean_df
total = clean_df.isnull().sum().sort_values(ascending=False)
percent = (clean_df.isnull().sum()/clean_df.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data.head(6)

Unnamed: 0,Total,Percent
publisher,890,0.015854
genre,352,0.00627
developer,196,0.003491
all_reviews,178,0.003171
popular_tags,102,0.001817
game_details,80,0.001425


In [11]:
steam_clean = clean_df.dropna(how='any', subset=['publisher', 'genre', 'developer', 
                                                 'all_reviews', 'popular_tags', 'game_details'])

In [12]:
#check for missing values in steam_clean
total = steam_clean.isnull().sum().sort_values(ascending=False)
percent = (steam_clean.isnull().sum()/steam_clean.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data.head()

Unnamed: 0,Total,Percent
genre,0,0.0
game_details,0,0.0
popular_tags,0,0.0
publisher,0,0.0
developer,0,0.0


In [13]:
#steam refunds games played for less than 2 hours, so logical to remove games with fewer hours palyed than 2.0
steam_df = steam_clean[steam_clean['hours_played'] > 2.0] 

In [14]:
#also filter for games with user count >50
steam_df.groupby('game')['userID'].count()

steam_train = steam_df[steam_df.groupby('game').userID.transform('count')>49].copy()

In [15]:
steam_train.head()

Unnamed: 0,userID,game,hours_played,all_reviews,developer,publisher,popular_tags,game_details,genre
51896,5250,Alien Swarm,4.9,"Very Positive,(18,439),- 94% of the 18,439 use...",Valve,"Valve,Valve","Free to Play,Co-op,Action,Multiplayer,Aliens,O...","Single-player,Multi-player,Co-op,Steam Achieve...",Action
27686,5250,Portal 2,13.6,"Overwhelmingly Positive,(104,354),- 98% of the...",Valve,"Valve,Valve","Puzzle,Co-op,First-Person,Sci-fi,Comedy,Single...","Single-player,Co-op,Steam Achievements,Full co...","Action,Adventure"
34318,76767,Banished,24.0,"Very Positive,(23,931),- 89% of the 23,931 use...",Shining Rock Software LLC,"Shining Rock Software LLC,Shining Rock Softwar...","City Builder,Strategy,Simulation,Survival,Indi...","Single-player,Steam Achievements","Indie,Simulation,Strategy"
58286,76767,Counter-Strike,365.0,"Overwhelmingly Positive,(66,438),- 96% of the ...",Valve,"Valve,Valve","Action,FPS,Multiplayer,Shooter,Classic,Team-Ba...","Multi-player,Online Multi-Player,Local Multi-P...",Action
27625,76767,Portal 2,15.0,"Overwhelmingly Positive,(104,354),- 98% of the...",Valve,"Valve,Valve","Puzzle,Co-op,First-Person,Sci-fi,Comedy,Single...","Single-player,Co-op,Steam Achievements,Full co...","Action,Adventure"
