# STEAM VIDEO GAMES RECOMMENDATION SYSTEM

# BUSINESS PROBLEM

For this project i will be creating a content and collaborative based recommendation model that provides video game recommendations to steam users. This project will utlize Steam's API, SteamSpy's API along with data sets sourced from kaggle in order to create a recommendation system. 
<br><br>
The aim of this project is to improve steam's current recommendation system to provide users with much more accurate recommendations.

### Importing necessary packages

In [215]:
# importing necessary packages
import pandas as pd
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 100)
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import json
import requests
from bs4 import BeautifulSoup

from sklearn.preprocessing import StandardScaler 
from sklearn import preprocessing

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Obtaining the API key

In [216]:
# function that obtains api key from folder
def get_keys(path):
    ''' this function obtains the api key'''
    with open(path) as f:
        return json.load(f)

In [217]:
# getting the api key
keys = get_keys("/Users/Visitor/Documents/Flatiron/capstone_project/.secret/steam_api.json")

In [218]:
# getting and assigning the api key to a variable
api_key = keys['api_key']

# Generating requests from Steam's API

From Steam's api the following data will be used:
<li>GetPlayerSummaries
<li>GetUserStatsForGame
<li>GetRecentlyPlayedGames
<li>GetOwnedGames
<li>GetFriendList
<li>GetAppList

### Player summaries (GetPlayerSummaries)

In [219]:
# getting requests from the api for the player summaries
resp = requests.get("http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=B8DD767E6A0BF4F61B05D6E5483C29A9&steamids=76561197960435530")

In [220]:
# checking if the request was sucessful
resp.status_code == requests.codes.ok

True

In [221]:
#filtering the data
resp.json()['response']['players']

[{'steamid': '76561197960435530',
  'communityvisibilitystate': 3,
  'profilestate': 1,
  'personaname': 'Robin',
  'profileurl': 'https://steamcommunity.com/id/robinwalker/',
  'avatar': 'https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/f1/f1dd60a188883caf82d0cbfccfe6aba0af1732d4.jpg',
  'avatarmedium': 'https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/f1/f1dd60a188883caf82d0cbfccfe6aba0af1732d4_medium.jpg',
  'avatarfull': 'https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/f1/f1dd60a188883caf82d0cbfccfe6aba0af1732d4_full.jpg',
  'avatarhash': 'f1dd60a188883caf82d0cbfccfe6aba0af1732d4',
  'personastate': 0,
  'realname': 'Robin Walker',
  'primaryclanid': '103582791429521412',
  'timecreated': 1063407589,
  'personastateflags': 0,
  'loccountrycode': 'US',
  'locstatecode': 'WA',
  'loccityid': 3961}]

In [222]:
# creating a data frame using the obtained information
df = pd.DataFrame.from_dict(resp.json()['response']['players'])

In [223]:
# displaying the data frame
df.head()

Unnamed: 0,steamid,communityvisibilitystate,profilestate,personaname,profileurl,avatar,avatarmedium,avatarfull,avatarhash,personastate,realname,primaryclanid,timecreated,personastateflags,loccountrycode,locstatecode,loccityid
0,76561197960435530,3,1,Robin,https://steamcommunity.com/id/robinwalker/,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcdn-a.akamaihd.net/steamcommunity...,f1dd60a188883caf82d0cbfccfe6aba0af1732d4,0,Robin Walker,103582791429521412,1063407589,0,US,WA,3961


### Friend's list (GetFriendList)

In [224]:
# getting requests from the api for users friend lists
resp = requests.get("http://api.steampowered.com/ISteamUser/GetFriendList/v0001/?key=B8DD767E6A0BF4F61B05D6E5483C29A9&steamid=76561197960435530&relationship=friend")

In [225]:
# checking if the request was sucessful
resp.status_code == requests.codes.ok

True

In [226]:
# filtering the data
# resp.json()['friendslist']['friends']

In [227]:
# creating a data frame from the obtained data
df = pd.DataFrame.from_dict(resp.json()['friendslist']['friends'])

In [228]:
# displaying the newly created data frame
df.head()

Unnamed: 0,steamid,relationship,friend_since
0,76561197960265731,friend,0
1,76561197960265738,friend,0
2,76561197960265740,friend,0
3,76561197960265744,friend,1585508613
4,76561197960265747,friend,0


### List of all games (GetAppList)

In [229]:
# obtaining requests from the api to get a list of steam games
resp = requests.get("https://api.steampowered.com/ISteamApps/GetAppList/v2/")

In [230]:
# checking if the request was sucessful
resp.status_code == requests.codes.ok

True

In [231]:
#filtering the data
#resp.json()['applist']['apps']

In [232]:
# creating a data frame with the obtained data
df_games = pd.DataFrame.from_dict(resp.json()['applist']['apps'])

In [233]:
# displaying the data frame
df_games.head()

# Generating requests from SteamSpy

### Using steamspy to request steam's game information

In [234]:
# requesting all game data from steamspy
resp = requests.get("http://steamspy.com/api.php?request=all")

In [235]:
# converting the request to json format
dic_app_user = resp.json()

In [236]:
# obtaining the keys of the dictionary
lst_app_id = dic_app_user.keys()

In [237]:
# checking length
len(dic_app_user)

35855

In [238]:
# checking if the request was sucessful
resp.status_code == requests.codes.ok

True

In [239]:
# filtering the data
# resp.json()

In [240]:
# converting the dictionary to a data frame
df = pd.DataFrame.from_dict(resp.json())

In [241]:
# displaying the data frame
df

Unnamed: 0,570,730,578080,440,304930,230410,271590,359550,291550,550,105600,4000,10,340,272060,444090,240,236390,238960,218620,49520,227940,275390,1085660,301520,252950,291480,220,252490,360,304050,620,433850,550650,386360,96000,44350,8930,72850,417910,80,381210,431960,582010,218230,292030,70,320,755790,370910,...,723210,439400,702620,709390,72530,794240,809640,804910,804540,865810,599490,743920,785790,611820,652730,789710,729290,705000,541310,736240,750200,751270,761720,723360,593150,201930,675330,778140,591020,646240,858730,651490,654200,706550,587030,46470,852,701360,18310,17760,497960,721460,859120,512720,848400,643930,553140,781150,874630,1031290
appid,570,730,578080,440,304930,230410,271590,359550,291550,550,105600,4000,10,340,272060,444090,240,236390,238960,218620,49520,227940,275390,1085660,301520,252950,291480,220,252490,360,304050,620,433850,550650,386360,96000,44350,8930,72850,417910,80,381210,431960,582010,218230,292030,70,320,755790,370910,...,723210,439400,702620,709390,72530,794240,809640,804910,804540,865810,599490,743920,785790,611820,652730,789710,729290,705000,541310,736240,750200,751270,761720,723360,593150,201930,675330,778140,591020,646240,858730,651490,654200,706550,587030,46470,852,701360,18310,17760,497960,721460,859120,512720,848400,643930,553140,781150,874630,1031290
name,Dota 2,Counter-Strike: Global Offensive,PLAYERUNKNOWN'S BATTLEGROUNDS,Team Fortress 2,Unturned,Warframe,Grand Theft Auto V,Tom Clancy's Rainbow Six Siege,Brawlhalla,Left 4 Dead 2,Terraria,Garry's Mod,Counter-Strike,Half-Life 2: Lost Coast,Serena,Paladins,Counter-Strike: Source,War Thunder,Path of Exile,PAYDAY 2,Borderlands 2,Heroes & Generals,Guacamelee! Super Turbo Championship Edition,Destiny 2,Robocraft,Rocket League,Warface,Half-Life 2,Rust,Half-Life Deathmatch: Source,Trove,Portal 2,Z1 Battle Royale,Black Squad,SMITE,The Tiny Bang Story,GRID 2,Sid Meier's Civilization V,The Elder Scrolls V: Skyrim,Street Warriors Online,Counter-Strike: Condition Zero,Dead by Daylight,Wallpaper Engine,MONSTER HUNTER: WORLD,PlanetSide 2,The Witcher 3: Wild Hunt,Half-Life,Half-Life 2: Deathmatch,Ring of Elysium,Kathy Rain,...,Mind Sweeper VR,Legends of Callasia Demo,Wrecking Towers,Chewbrick,Arcadia Beta,Spinch,尺子和橡皮,Mine Seeker,Greedy Developer's Cash Grab,Waifu Fight Dango Style,Bounty Killer,Choo-Choo! The Train Rides!,WHAT THE GOLF?,Xenosis: Alien Infection,Z-Aftershock,BoyAndLabyrinth,Hidden Cubes,OctorSpace,Tiny Thor,The Revolt: Awakening,AWAY: The Survival Series,BioEntity,Can you eat by yourself,Asunder,Ooblets,Jamestown IGF,Space Crawl,BRIKS 2,山贼,Bacon May Die,Yoke Light,No Longer Home,Seven: Reboot,Kaya,Bunker Busters Steamworks Test,Grotesque Tactics: Evil Heroes - Dev,ValveTestApp852,Fate Crawler,Spectraball - Demo,,Legends of Callasia Demo,Xeno Time Inception,That Tiny Spaceship,Velocidevorium,Simian Rising,拯救大魔王3 Falsemen3,Green Mirror,Gene Rain,M.A.D. Cliff - All Quiet On The Bridge,Grove flowers
developer,Valve,"Valve, Hidden Path Entertainment",PUBG Corporation,Valve,Smartly Dressed Games,Digital Extremes,Rockstar North,Ubisoft Montreal,Blue Mammoth Games,Valve,Re-Logic,Facepunch Studios,Valve,Valve,Senscape,Evil Mojo Games,Valve,Gaijin Entertainment,Grinding Gear Games,OVERKILL - a Starbreeze Studio.,"Gearbox Software, Aspyr (Mac), Aspyr (Linux)",RETO MOTO,DrinkBox Studios,Bungie,Freejam,Psyonix LLC,MY.GAMES,Valve,Facepunch Studios,Valve,Trion Worlds,Valve,Daybreak Game Company,NS STUDIO,Titan Forge Games,Colibri Games,Codemasters Racing,"Firaxis Games, Aspyr (Mac), Aspyr (Linux)",Bethesda Game Studios,Crazy Rocks Studios,Valve,Behaviour Interactive Inc.,Wallpaper Engine Team,"CAPCOM Co., Ltd.",Rogue Planet Games,CD PROJEKT RED,Valve,Valve,Aurora Studio,Clifftop Games,...,Chad R. Banks,,Mindtrick,Decabry,,Queen Bee Games,XIAOMU,Jason Crosby,Greedy Developer,Enso Entertainment,Galaxy Game Studio,SE Games,Triband,NerdRage Studios,Mango Tree Game,WBTgame,NixieCraft,"DrinkingNails, LLC",Asylum Square,Siberius Studio,Breaking Walls,Sneaky Party LLC,Independent team,Dawson Frakes,Glumberland,,Hyperfine Studio,Smobile,Tushuo,SnoutUp,Kotoshiro,"Humble Grove, Hana Lee, Cel Davison, Adrienne ...",CakeEaterGames,KirUn,,,,MixerGames,,,,Xeno Gaming LLC,We Make Small Games,Really Slick,All Caps Industries,吃了就睡工作室,"Bad2theBone, Glumpy Fish",Deeli network,Flying Whale,Artepi Corporation
publisher,Valve,Valve,PUBG Corporation,Valve,Smartly Dressed Games,Digital Extremes,Rockstar Games,Ubisoft,Ubisoft,Valve,Re-Logic,Valve,Valve,Valve,Senscape,Hi-Rez Studios,Valve,Gaijin Distribution KFT,Grinding Gear Games,Starbreeze Publishing AB,"2K, Aspyr (Mac), Aspyr (Linux)",RETO MOTO,DrinkBox Studios,Bungie,Freejam,Psyonix LLC,MY.GAMES,Valve,Facepunch Studios,Valve,Trion Worlds,Valve,Daybreak Game Company,NS STUDIO,Hi-Rez Studios,Colibri Games,Codemasters,"2K, Aspyr (Mac), Aspyr (Linux)",Bethesda Softworks,Crazy Rocks Studios,Valve,Behaviour Interactive Inc.,Wallpaper Engine Team,"CAPCOM Co., Ltd.",Daybreak Game Company,CD PROJEKT RED,Valve,Valve,TCH Scarlet Limited,Raw Fury,...,Chad R. Banks,,Mindtrick,Decabry,,Akupara Games,XIAOMU,Jason Crosby,Greedy Publisher,Enso Entertainment,Galaxy Game Studio,SeStudio,Triband,NerdRage Studios,Mango Tree Game,WBTgame,NixieCraft,"DrinkingNails, LLC",Asylum Square,Siberius Studio,Breaking Walls,Sneaky Party LLC,Independent team,Dawson Frakes,Glumberland,,Hyperfine Studio,Smobile,Tushuo,SnoutUp,Kotoshiro,Humble Grove,CakeEaterGames,KirUn,,,,MixerGames,,,,Xeno Gaming LLC,We Make Small Games,Really Slick,All Caps Industries,吃了就睡工作室,Glumpy Fish,Deeli network,Flying Whale,Artepi Corporation
score_rank,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
positive,1096061,3828150,780582,626993,348990,327447,686046,597298,128060,344542,488434,535259,154127,6874,4735,227458,100800,126394,122832,376070,211378,79706,4420,203023,84103,380129,39579,97750,366994,1786,58338,190881,114919,52308,52253,4561,20079,154764,266483,989,15082,246984,170926,199396,44690,363438,44922,7442,66975,2428,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6
negative,194043,532417,663880,39149,34793,32630,175493,84303,22734,10420,10776,20365,4015,1109,1415,38779,4209,30458,8873,58191,15118,38877,368,29922,32044,39454,17322,2896,72638,566,14976,2426,93843,15126,12448,650,4229,6267,15306,728,1592,61060,3107,43651,9112,7010,1682,871,21062,192,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3
userscore,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
owners,"100,000,000 .. 200,000,000","100,000,000 .. 200,000,000","20,000,000 .. 50,000,000","20,000,000 .. 50,000,000","20,000,000 .. 50,000,000","20,000,000 .. 50,000,000","20,000,000 .. 50,000,000","20,000,000 .. 50,000,000","20,000,000 .. 50,000,000","20,000,000 .. 50,000,000","20,000,000 .. 50,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","10,000,000 .. 20,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000","5,000,000 .. 10,000,000",...,"0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000","0 .. 20,000"
average_forever,31602,27524,23253,8201,3158,7605,11314,12352,1441,2375,6118,8400,7037,163,462,2502,7229,3353,6631,4425,3470,846,268,4180,1391,11281,776,674,16775,388,1698,992,3764,1245,2134,300,556,6648,7515,88,1633,8032,2175,9286,1610,4710,786,473,654,1502,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [242]:
# checking the shape 
df.shape

(16, 35855)

In [243]:
# transposing the data frame to convert the columns into rows
df = df.T

In [244]:
# checking the updated data frame
df.head()

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,owners,average_forever,average_2weeks,median_forever,median_2weeks,price,initialprice,discount
570,570,Dota 2,Valve,Valve,,1096061,194043,0,"100,000,000 .. 200,000,000",31602,1707,1211,876,0,0,0
730,730,Counter-Strike: Global Offensive,"Valve, Hidden Path Entertainment",Valve,,3828150,532417,0,"100,000,000 .. 200,000,000",27524,1201,8399,470,0,0,0
578080,578080,PLAYERUNKNOWN'S BATTLEGROUNDS,PUBG Corporation,PUBG Corporation,,780582,663880,0,"20,000,000 .. 50,000,000",23253,663,10926,210,2999,2999,0
440,440,Team Fortress 2,Valve,Valve,,626993,39149,0,"20,000,000 .. 50,000,000",8201,1206,424,261,0,0,0
304930,304930,Unturned,Smartly Dressed Games,Smartly Dressed Games,,348990,34793,0,"20,000,000 .. 50,000,000",3158,969,326,612,0,0,0


In [245]:
# checking shape
df.shape

(35855, 16)

In [246]:
# checking for any null values in this data set
df.isnull().any()

appid              False
name               False
developer          False
publisher          False
score_rank         False
positive           False
negative           False
userscore          False
owners             False
average_forever    False
average_2weeks     False
median_forever     False
median_2weeks      False
price               True
initialprice        True
discount            True
dtype: bool

In [247]:
# reseting index so that it doesnt show the appid twice
df.reset_index(drop=True, inplace=True)

In [248]:
# displaying the data frame to check the changes
df.head()

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,owners,average_forever,average_2weeks,median_forever,median_2weeks,price,initialprice,discount
0,570,Dota 2,Valve,Valve,,1096061,194043,0,"100,000,000 .. 200,000,000",31602,1707,1211,876,0,0,0
1,730,Counter-Strike: Global Offensive,"Valve, Hidden Path Entertainment",Valve,,3828150,532417,0,"100,000,000 .. 200,000,000",27524,1201,8399,470,0,0,0
2,578080,PLAYERUNKNOWN'S BATTLEGROUNDS,PUBG Corporation,PUBG Corporation,,780582,663880,0,"20,000,000 .. 50,000,000",23253,663,10926,210,2999,2999,0
3,440,Team Fortress 2,Valve,Valve,,626993,39149,0,"20,000,000 .. 50,000,000",8201,1206,424,261,0,0,0
4,304930,Unturned,Smartly Dressed Games,Smartly Dressed Games,,348990,34793,0,"20,000,000 .. 50,000,000",3158,969,326,612,0,0,0


# Importing data frames

### Data frame with games information

In [249]:
# inital data frame with the scraped data from steamspy
df.head()

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,owners,average_forever,average_2weeks,median_forever,median_2weeks,price,initialprice,discount
0,570,Dota 2,Valve,Valve,,1096061,194043,0,"100,000,000 .. 200,000,000",31602,1707,1211,876,0,0,0
1,730,Counter-Strike: Global Offensive,"Valve, Hidden Path Entertainment",Valve,,3828150,532417,0,"100,000,000 .. 200,000,000",27524,1201,8399,470,0,0,0
2,578080,PLAYERUNKNOWN'S BATTLEGROUNDS,PUBG Corporation,PUBG Corporation,,780582,663880,0,"20,000,000 .. 50,000,000",23253,663,10926,210,2999,2999,0
3,440,Team Fortress 2,Valve,Valve,,626993,39149,0,"20,000,000 .. 50,000,000",8201,1206,424,261,0,0,0
4,304930,Unturned,Smartly Dressed Games,Smartly Dressed Games,,348990,34793,0,"20,000,000 .. 50,000,000",3158,969,326,612,0,0,0


### Data frame with player purchase/play information

this data set was sourced from kaggle: https://www.kaggle.com/tamber/steam-video-games 

In [250]:
# player purchase/play details
df_2 = pd.read_csv('steam-200k.csv')
df_2.head()

Unnamed: 0,151603712,The Elder Scrolls V Skyrim,purchase,1.0,0
0,151603712,The Elder Scrolls V Skyrim,play,273.0,0
1,151603712,Fallout 4,purchase,1.0,0
2,151603712,Fallout 4,play,87.0,0
3,151603712,Spore,purchase,1.0,0
4,151603712,Spore,play,14.9,0


### Data frame with games descriptions

this data set was sourced from kaggle: https://www.kaggle.com/nikdavis/steam-store-games

In [251]:
# game descriptions
df_3 = pd.read_csv('steam_description_data.csv')
df_3.head()

Unnamed: 0,steam_appid,detailed_description,about_the_game,short_description
0,10,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...
1,20,One of the most popular online action games of...,One of the most popular online action games of...,One of the most popular online action games of...
2,30,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...
3,40,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...
4,50,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...


### Data frame with more games information

this data set was sourced from kaggle: https://www.kaggle.com/nikdavis/steam-store-games

In [252]:
# information
df_4 = pd.read_csv('steam.csv')
df_4.head()

Unnamed: 0,appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price
0,10,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,124534,3339,17612,317,10000000-20000000,7.19
1,20,Team Fortress Classic,1999-04-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,3318,633,277,62,5000000-10000000,3.99
2,30,Day of Defeat,2003-05-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Valve Anti-Cheat enabled,Action,FPS;World War II;Multiplayer,0,3416,398,187,34,5000000-10000000,3.99
3,40,Deathmatch Classic,2001-06-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,1273,267,258,184,5000000-10000000,3.99
4,50,Half-Life: Opposing Force,1999-11-01,1,Gearbox Software,Valve,windows;mac;linux,0,Single-player;Multi-player;Valve Anti-Cheat en...,Action,FPS;Action;Sci-fi,0,5250,288,624,415,5000000-10000000,3.99


# Merging the data frames

In [253]:
# displaying data frame with steamspy data
df.head()

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,owners,average_forever,average_2weeks,median_forever,median_2weeks,price,initialprice,discount
0,570,Dota 2,Valve,Valve,,1096061,194043,0,"100,000,000 .. 200,000,000",31602,1707,1211,876,0,0,0
1,730,Counter-Strike: Global Offensive,"Valve, Hidden Path Entertainment",Valve,,3828150,532417,0,"100,000,000 .. 200,000,000",27524,1201,8399,470,0,0,0
2,578080,PLAYERUNKNOWN'S BATTLEGROUNDS,PUBG Corporation,PUBG Corporation,,780582,663880,0,"20,000,000 .. 50,000,000",23253,663,10926,210,2999,2999,0
3,440,Team Fortress 2,Valve,Valve,,626993,39149,0,"20,000,000 .. 50,000,000",8201,1206,424,261,0,0,0
4,304930,Unturned,Smartly Dressed Games,Smartly Dressed Games,,348990,34793,0,"20,000,000 .. 50,000,000",3158,969,326,612,0,0,0


In [254]:
# checking shape
df.shape

(35855, 16)

In [255]:
# checking data of the data frame containg game information
df_4.head()

Unnamed: 0,appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price
0,10,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,124534,3339,17612,317,10000000-20000000,7.19
1,20,Team Fortress Classic,1999-04-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,3318,633,277,62,5000000-10000000,3.99
2,30,Day of Defeat,2003-05-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Valve Anti-Cheat enabled,Action,FPS;World War II;Multiplayer,0,3416,398,187,34,5000000-10000000,3.99
3,40,Deathmatch Classic,2001-06-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,1273,267,258,184,5000000-10000000,3.99
4,50,Half-Life: Opposing Force,1999-11-01,1,Gearbox Software,Valve,windows;mac;linux,0,Single-player;Multi-player;Valve Anti-Cheat en...,Action,FPS;Action;Sci-fi,0,5250,288,624,415,5000000-10000000,3.99


In [256]:
# checking shape
df_4.shape

(27075, 18)

In [257]:
# checking columns
df_4.columns

Index(['appid', 'name', 'release_date', 'english', 'developer', 'publisher', 'platforms',
       'required_age', 'categories', 'genres', 'steamspy_tags', 'achievements', 'positive_ratings',
       'negative_ratings', 'average_playtime', 'median_playtime', 'owners', 'price'],
      dtype='object')

In [258]:
# checking columns
df.columns

Index(['appid', 'name', 'developer', 'publisher', 'score_rank', 'positive', 'negative',
       'userscore', 'owners', 'average_forever', 'average_2weeks', 'median_forever',
       'median_2weeks', 'price', 'initialprice', 'discount'],
      dtype='object')

In [259]:
df[df['name'] == 'Dota 2']

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,owners,average_forever,average_2weeks,median_forever,median_2weeks,price,initialprice,discount
0,570,Dota 2,Valve,Valve,,1096061,194043,0,"100,000,000 .. 200,000,000",31602,1707,1211,876,0,0,0


In [260]:
df_4[df_4.name == 'Dota 2']

Unnamed: 0,appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price
22,570,Dota 2,2013-07-09,1,Valve,Valve,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,801,100000000-200000000,0.0


In [261]:
# dropping unessasary columns
df.drop(['price', 'initialprice', 'discount', 'average_2weeks', 'median_2weeks', 'owners'], axis=1, inplace=True)

In [262]:
# dropping unessasary columns
df_4.drop(['name', 'developer', 'publisher'], axis=1, inplace=True)

In [263]:
df_gameinfo = pd.DataFrame.merge(df,df_4,on='appid')

In [264]:
df_gameinfo.head()

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,average_forever,median_forever,release_date,english,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price
0,570,Dota 2,Valve,Valve,,1096061,194043,0,31602,1211,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,801,100000000-200000000,0.0
1,730,Counter-Strike: Global Offensive,"Valve, Hidden Path Entertainment",Valve,,3828150,532417,0,27524,8399,2012-08-21,1,windows;mac;linux,0,Multi-player;Steam Achievements;Full controlle...,Action;Free to Play,FPS;Multiplayer;Shooter,167,2644404,402313,22494,6502,50000000-100000000,0.0
2,578080,PLAYERUNKNOWN'S BATTLEGROUNDS,PUBG Corporation,PUBG Corporation,,780582,663880,0,23253,10926,2017-12-21,1,windows,0,Multi-player;Online Multi-Player;Stats,Action;Adventure;Massively Multiplayer,Survival;Shooter;Multiplayer,37,496184,487076,22938,12434,50000000-100000000,26.99
3,440,Team Fortress 2,Valve,Valve,,626993,39149,0,8201,424,2007-10-10,1,windows;mac;linux,0,Multi-player;Cross-Platform Multiplayer;Steam ...,Action;Free to Play,Free to Play;Multiplayer;FPS,520,515879,34036,8495,623,20000000-50000000,0.0
4,304930,Unturned,Smartly Dressed Games,Smartly Dressed Games,,348990,34793,0,3158,326,2017-07-07,1,windows;mac;linux,0,Single-player;Online Multi-Player;Online Co-op...,Action;Adventure;Casual;Free to Play;Indie,Free to Play;Survival;Zombies,46,292574,31482,3248,413,20000000-50000000,0.0


In [265]:
df_gameinfo.shape

(25974, 24)

In [266]:
df_gameinfo.columns

Index(['appid', 'name', 'developer', 'publisher', 'score_rank', 'positive', 'negative',
       'userscore', 'average_forever', 'median_forever', 'release_date', 'english', 'platforms',
       'required_age', 'categories', 'genres', 'steamspy_tags', 'achievements', 'positive_ratings',
       'negative_ratings', 'average_playtime', 'median_playtime', 'owners', 'price'],
      dtype='object')

In [267]:
df_3

Unnamed: 0,steam_appid,detailed_description,about_the_game,short_description
0,10,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...
1,20,One of the most popular online action games of...,One of the most popular online action games of...,One of the most popular online action games of...
2,30,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...
3,40,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...
4,50,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...
...,...,...,...,...
27329,1065230,"<img src=""https://steamcdn-a.akamaihd.net/stea...","<img src=""https://steamcdn-a.akamaihd.net/stea...",The Room of Pandora is a third-person interact...
27330,1065570,Have you ever been so lonely that no one but y...,Have you ever been so lonely that no one but y...,Cyber Gun is a hardcore first-person shooter w...
27331,1065650,<strong>Super Star Blast </strong>is a space b...,<strong>Super Star Blast </strong>is a space b...,Super Star Blast is a space based game with ch...
27332,1066700,Pursue a snow-white deer through an enchanted ...,Pursue a snow-white deer through an enchanted ...,Pursue a snow-white deer through an enchanted ...


In [268]:
df_3.rename(columns={'steam_appid': 'appid'}, inplace=True)

In [269]:
df_gameinfo = pd.DataFrame.merge(df_gameinfo,df_3,on='appid')

In [270]:
df_gameinfo.head()

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,average_forever,median_forever,release_date,english,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price,detailed_description,about_the_game,short_description
0,570,Dota 2,Valve,Valve,,1096061,194043,0,31602,1211,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,801,100000000-200000000,0.0,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter..."
1,730,Counter-Strike: Global Offensive,"Valve, Hidden Path Entertainment",Valve,,3828150,532417,0,27524,8399,2012-08-21,1,windows;mac;linux,0,Multi-player;Steam Achievements;Full controlle...,Action;Free to Play,FPS;Multiplayer;Shooter,167,2644404,402313,22494,6502,50000000-100000000,0.0,Counter-Strike: Global Offensive (CS: GO) expa...,Counter-Strike: Global Offensive (CS: GO) expa...,Counter-Strike: Global Offensive (CS: GO) expa...
2,578080,PLAYERUNKNOWN'S BATTLEGROUNDS,PUBG Corporation,PUBG Corporation,,780582,663880,0,23253,10926,2017-12-21,1,windows,0,Multi-player;Online Multi-Player;Stats,Action;Adventure;Massively Multiplayer,Survival;Shooter;Multiplayer,37,496184,487076,22938,12434,50000000-100000000,26.99,<strong>PLAYERUNKNOWN'S BATTLEGROUNDS</strong>...,<strong>PLAYERUNKNOWN'S BATTLEGROUNDS</strong>...,PLAYERUNKNOWN'S BATTLEGROUNDS is a battle roya...
3,440,Team Fortress 2,Valve,Valve,,626993,39149,0,8201,424,2007-10-10,1,windows;mac;linux,0,Multi-player;Cross-Platform Multiplayer;Steam ...,Action;Free to Play,Free to Play;Multiplayer;FPS,520,515879,34036,8495,623,20000000-50000000,0.0,"<h1>The Jungle Inferno Update</h1><p><a href=""...","<p><strong>""The most fun you can have online""<...",Nine distinct classes provide a broad range of...
4,304930,Unturned,Smartly Dressed Games,Smartly Dressed Games,,348990,34793,0,3158,326,2017-07-07,1,windows;mac;linux,0,Single-player;Online Multi-Player;Online Co-op...,Action;Adventure;Casual;Free to Play;Indie,Free to Play;Survival;Zombies,46,292574,31482,3248,413,20000000-50000000,0.0,"<img src=""https://steamcdn-a.akamaihd.net/stea...","<img src=""https://steamcdn-a.akamaihd.net/stea...",You're a survivor in the zombie infested ruins...


In [271]:
df_2

Unnamed: 0,151603712,The Elder Scrolls V Skyrim,purchase,1.0,0
0,151603712,The Elder Scrolls V Skyrim,play,273.0,0
1,151603712,Fallout 4,purchase,1.0,0
2,151603712,Fallout 4,play,87.0,0
3,151603712,Spore,purchase,1.0,0
4,151603712,Spore,play,14.9,0
...,...,...,...,...,...
199994,128470551,Titan Souls,play,1.5,0
199995,128470551,Grand Theft Auto Vice City,purchase,1.0,0
199996,128470551,Grand Theft Auto Vice City,play,1.5,0
199997,128470551,RUSH,purchase,1.0,0


In [272]:
df_2.rename(columns={'151603712': 'id', 'The Elder Scrolls V Skyrim': 'name', 
                     '1.0': 'hours_of_play'}, inplace=True)

In [273]:
df_2.head()

Unnamed: 0,id,name,purchase,hours_of_play,0
0,151603712,The Elder Scrolls V Skyrim,play,273.0,0
1,151603712,Fallout 4,purchase,1.0,0
2,151603712,Fallout 4,play,87.0,0
3,151603712,Spore,purchase,1.0,0
4,151603712,Spore,play,14.9,0


In [274]:
df_2.columns

Index(['id', 'name', 'purchase', 'hours_of_play', '0'], dtype='object')

In [275]:
df_2['0'].unique()

array([0], dtype=int64)

In [276]:
del df_2['0']

In [277]:
df_2.head()

Unnamed: 0,id,name,purchase,hours_of_play
0,151603712,The Elder Scrolls V Skyrim,play,273.0
1,151603712,Fallout 4,purchase,1.0
2,151603712,Fallout 4,play,87.0
3,151603712,Spore,purchase,1.0
4,151603712,Spore,play,14.9


In [278]:
df_2[df_2.name == 'Dota 2']

Unnamed: 0,id,name,purchase,hours_of_play
41,151603712,Dota 2,purchase,1.0
42,151603712,Dota 2,play,0.5
65,187131847,Dota 2,purchase,1.0
66,187131847,Dota 2,play,2.3
854,176410694,Dota 2,purchase,1.0
...,...,...,...,...
199896,99096740,Dota 2,play,1704.0
199947,176449171,Dota 2,purchase,1.0
199948,176449171,Dota 2,play,1310.0
199959,221315846,Dota 2,purchase,1.0


In [279]:
df_final = pd.DataFrame.merge(df_gameinfo,df_2,on='name')

In [280]:
df_final.head()

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,average_forever,median_forever,release_date,english,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price,detailed_description,about_the_game,short_description,id,purchase,hours_of_play
0,570,Dota 2,Valve,Valve,,1096061,194043,0,31602,1211,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,801,100000000-200000000,0.0,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",151603712,purchase,1.0
1,570,Dota 2,Valve,Valve,,1096061,194043,0,31602,1211,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,801,100000000-200000000,0.0,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",151603712,play,0.5
2,570,Dota 2,Valve,Valve,,1096061,194043,0,31602,1211,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,801,100000000-200000000,0.0,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",187131847,purchase,1.0
3,570,Dota 2,Valve,Valve,,1096061,194043,0,31602,1211,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,801,100000000-200000000,0.0,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",187131847,play,2.3
4,570,Dota 2,Valve,Valve,,1096061,194043,0,31602,1211,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,801,100000000-200000000,0.0,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",176410694,purchase,1.0


In [281]:
df_final.shape

(99898, 30)

# DATA CLEANING

In [282]:
df = df_final

In [283]:
# detecting nan values in dataframe
df.isnull().values.any()

False

In [284]:
# locating where the nan values exist and the amount of nan values for each columns
df.isnull().sum()

appid                   0
name                    0
developer               0
publisher               0
score_rank              0
positive                0
negative                0
userscore               0
average_forever         0
median_forever          0
release_date            0
english                 0
platforms               0
required_age            0
categories              0
genres                  0
steamspy_tags           0
achievements            0
positive_ratings        0
negative_ratings        0
average_playtime        0
median_playtime         0
owners                  0
price                   0
detailed_description    0
about_the_game          0
short_description       0
id                      0
purchase                0
hours_of_play           0
dtype: int64

In [285]:
df.shape

(99898, 30)

In [286]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 99898 entries, 0 to 99897
Data columns (total 30 columns):
appid                   99898 non-null object
name                    99898 non-null object
developer               99898 non-null object
publisher               99898 non-null object
score_rank              99898 non-null object
positive                99898 non-null object
negative                99898 non-null object
userscore               99898 non-null object
average_forever         99898 non-null object
median_forever          99898 non-null object
release_date            99898 non-null object
english                 99898 non-null int64
platforms               99898 non-null object
required_age            99898 non-null int64
categories              99898 non-null object
genres                  99898 non-null object
steamspy_tags           99898 non-null object
achievements            99898 non-null int64
positive_ratings        99898 non-null int64
negative_ratings     

In [287]:
df.describe()

Unnamed: 0,english,required_age,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,price,id,hours_of_play
count,99898.0,99898.0,99898.0,99898.0,99898.0,99898.0,99898.0,99898.0,99898.0,99898.0
mean,0.99969,2.231236,73.77487,157967.949969,21642.207792,4436.0883,590.91149,6.703205,113469100.0,21.733638
std,0.017613,5.813158,155.255117,263975.719136,42423.371777,7263.747083,1437.542446,7.575958,75361310.0,171.983658
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5250.0,0.1
25%,1.0,0.0,0.0,3700.0,661.0,258.0,150.0,0.0,51085780.0,1.0
50%,1.0,0.0,29.0,27755.0,2609.0,950.0,364.0,6.99,101000600.0,1.0
75%,1.0,0.0,67.0,144595.0,16433.0,4760.0,801.0,9.99,166863100.0,2.0
max,1.0,18.0,1746.0,863507.0,142079.0,95242.0,190445.0,69.99,309903100.0,11754.0


In [288]:
df['release_date'] = pd.to_datetime(df['release_date'])

In [289]:
df.head()

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,average_forever,median_forever,release_date,english,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price,detailed_description,about_the_game,short_description,id,purchase,hours_of_play
0,570,Dota 2,Valve,Valve,,1096061,194043,0,31602,1211,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,801,100000000-200000000,0.0,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",151603712,purchase,1.0
1,570,Dota 2,Valve,Valve,,1096061,194043,0,31602,1211,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,801,100000000-200000000,0.0,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",151603712,play,0.5
2,570,Dota 2,Valve,Valve,,1096061,194043,0,31602,1211,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,801,100000000-200000000,0.0,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",187131847,purchase,1.0
3,570,Dota 2,Valve,Valve,,1096061,194043,0,31602,1211,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,801,100000000-200000000,0.0,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",187131847,play,2.3
4,570,Dota 2,Valve,Valve,,1096061,194043,0,31602,1211,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,801,100000000-200000000,0.0,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",176410694,purchase,1.0


In [290]:
df.columns

Index(['appid', 'name', 'developer', 'publisher', 'score_rank', 'positive', 'negative',
       'userscore', 'average_forever', 'median_forever', 'release_date', 'english', 'platforms',
       'required_age', 'categories', 'genres', 'steamspy_tags', 'achievements', 'positive_ratings',
       'negative_ratings', 'average_playtime', 'median_playtime', 'owners', 'price',
       'detailed_description', 'about_the_game', 'short_description', 'id', 'purchase',
       'hours_of_play'],
      dtype='object')

In [291]:
df = df[['id', 'appid', 'name', 'purchase', 'hours_of_play', 'developer',
       'publisher', 'score_rank', 'positive', 'negative', 'userscore', 'release_date', 'english',
       'platforms', 'required_age', 'categories', 'genres', 'steamspy_tags', 'achievements',
       'positive_ratings', 'negative_ratings', 'average_playtime', 'average_forever',
       'median_playtime', 'median_forever', 'owners', 'detailed_description', 'about_the_game',
       'short_description', 'price']]

In [292]:
df.columns

Index(['id', 'appid', 'name', 'purchase', 'hours_of_play', 'developer', 'publisher', 'score_rank',
       'positive', 'negative', 'userscore', 'release_date', 'english', 'platforms', 'required_age',
       'categories', 'genres', 'steamspy_tags', 'achievements', 'positive_ratings',
       'negative_ratings', 'average_playtime', 'average_forever', 'median_playtime',
       'median_forever', 'owners', 'detailed_description', 'about_the_game', 'short_description',
       'price'],
      dtype='object')

In [293]:
df.head()

Unnamed: 0,id,appid,name,purchase,hours_of_play,developer,publisher,score_rank,positive,negative,userscore,release_date,english,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,average_forever,median_playtime,median_forever,owners,detailed_description,about_the_game,short_description,price
0,151603712,570,Dota 2,purchase,1.0,Valve,Valve,,1096061,194043,0,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,31602,801,1211,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0
1,151603712,570,Dota 2,play,0.5,Valve,Valve,,1096061,194043,0,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,31602,801,1211,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0
2,187131847,570,Dota 2,purchase,1.0,Valve,Valve,,1096061,194043,0,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,31602,801,1211,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0
3,187131847,570,Dota 2,play,2.3,Valve,Valve,,1096061,194043,0,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,31602,801,1211,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0
4,176410694,570,Dota 2,purchase,1.0,Valve,Valve,,1096061,194043,0,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,863507,142079,23944,31602,801,1211,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0


In [294]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 99898 entries, 0 to 99897
Data columns (total 30 columns):
id                      99898 non-null int64
appid                   99898 non-null object
name                    99898 non-null object
purchase                99898 non-null object
hours_of_play           99898 non-null float64
developer               99898 non-null object
publisher               99898 non-null object
score_rank              99898 non-null object
positive                99898 non-null object
negative                99898 non-null object
userscore               99898 non-null object
release_date            99898 non-null datetime64[ns]
english                 99898 non-null int64
platforms               99898 non-null object
required_age            99898 non-null int64
categories              99898 non-null object
genres                  99898 non-null object
steamspy_tags           99898 non-null object
achievements            99898 non-null int64
positive_rat

In [295]:
df['id'].astype(str).astype(int)

0        151603712
1        151603712
2        187131847
3        187131847
4        176410694
           ...    
99893     11373749
99894     55906572
99895     17530772
99896    192170147
99897    154230723
Name: id, Length: 99898, dtype: int32

In [296]:
df['appid'] = df['appid'].astype(str).astype('int64')

In [297]:
df.dtypes

id                               int64
appid                            int64
name                            object
purchase                        object
hours_of_play                  float64
developer                       object
publisher                       object
score_rank                      object
positive                        object
negative                        object
userscore                       object
release_date            datetime64[ns]
english                          int64
platforms                       object
required_age                     int64
categories                      object
genres                          object
steamspy_tags                   object
achievements                     int64
positive_ratings                 int64
negative_ratings                 int64
average_playtime                 int64
average_forever                 object
median_playtime                  int64
median_forever                  object
owners                   

In [298]:
df.name.unique()

array(['Dota 2', 'Team Fortress 2', 'Unturned', ..., 'Project Aftermath',
       "YOU DON'T KNOW JACK Vol. 2", "YOU DON'T KNOW JACK TELEVISION"],
      dtype=object)

In [299]:
df.score_rank.unique()

array([''], dtype=object)

In [300]:
df.userscore.unique()

array([0], dtype=object)

In [301]:
# deleting the unessesary columns
del df['userscore']
del df['score_rank']
del df['average_forever']
del df['median_forever']

In [302]:
# creating a new column to show the percentage of positive ratings of games
# adding the positive and negative columns
df['rank'] = df['positive'] + df['negative']

In [303]:
# dividing positive column by the rank column
df['rank'] = df['positive']/df['rank']

In [304]:
# multiplying rank by 100 to get percentage
df['rank'] = df['rank'] * 100

# Saving the final cleaned data frame to a csv file

In [305]:
df.to_csv('steam_rs.csv', index=False)