In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Final Project - Exploring Steam Game Trends and Predicting Optimal Game Prices

## 1. Data Cleaning

1.1 Cleaning the data in applicationInformation.csv. This really the only file outside of PriceHistory, PlayerCountHistory1 and PlayerCountHistory2 that we will be using. It contains the game titles and appid's which are an important part in figuring the prices and player counts.

In [115]:
import pandas as pd
import os.path
import numpy as np

df = pd.read_csv("steam_dataset/applicationInformation.csv",encoding= 'unicode_escape')


for i in df["type"]:
    if(i!='game'):
        df = df[df.type != i]
        
df.dropna(subset=['appid'], inplace=True)
df.dropna(subset=['type'], inplace=True)
df.dropna(subset=['name'], inplace=True)
df.dropna(subset=['releasedate'], inplace=True)
df.dropna(subset=['freetoplay'], inplace=True)
df

Unnamed: 0,appid,type,name,releasedate,freetoplay
0,578080,game,PLAYERUNKNOWN'S BATTLEGROUNDS,21-Dec-17,0.0
1,570,game,Dota 2,9-Jul-13,1.0
2,730,game,Counter-Strike: Global Offensive,21-Aug-12,1.0
4,359550,game,Tom Clancy's Rainbow Six Siege,1-Dec-15,0.0
5,271590,game,Grand Theft Auto V,13-Apr-15,0.0
...,...,...,...,...,...
1993,64000,game,Men of War: Assault Squad,24-Feb-11,0.0
1994,227860,game,Castle Story,17-Aug-17,0.0
1997,429050,game,Feed and Grow: Fish,8-Jan-16,0.0
1998,209650,game,Call of Duty: Advanced Warfare,3-Nov-14,0.0


# 2. Data Integration

2.1 Integrating the data from PriceHistory to our data frame. We just found average price for now and added it to the data frame.

In [116]:
averagePrices = []

for id in df['appid']:
  filename = str(id) + '.csv'
  if os.path.exists(f'steam_dataset/PriceHistory/{filename}'):
    game_price_df = pd.read_csv(f'steam_dataset/PriceHistory/{filename}',encoding= 'unicode_escape')
    totalprice = 0.0
    count = 0
    for price in game_price_df['Finalprice']:
      totalprice+=price
      count+=1
    averagePrices.append(round((totalprice/count), 2))
  else:
    averagePrices.append(0.0)

df['averageprice'] = averagePrices

df

Unnamed: 0,appid,type,name,releasedate,freetoplay,averageprice
0,578080,game,PLAYERUNKNOWN'S BATTLEGROUNDS,21-Dec-17,0.0,27.56
1,570,game,Dota 2,9-Jul-13,1.0,0.00
2,730,game,Counter-Strike: Global Offensive,21-Aug-12,1.0,0.00
4,359550,game,Tom Clancy's Rainbow Six Siege,1-Dec-15,0.0,17.08
5,271590,game,Grand Theft Auto V,13-Apr-15,0.0,25.88
...,...,...,...,...,...,...
1993,64000,game,Men of War: Assault Squad,24-Feb-11,0.0,8.82
1994,227860,game,Castle Story,17-Aug-17,0.0,22.63
1997,429050,game,Feed and Grow: Fish,8-Jan-16,0.0,14.88
1998,209650,game,Call of Duty: Advanced Warfare,3-Nov-14,0.0,53.64


2.2 Integrating the data from PlayerCountHistory1 and PlayerCountHistory2 to our data frame. We just found the average player counts for now and added them to our data frame.

In [120]:
averageplayercount = []

for id in df['appid']:
  filename = str(id) + '.csv'
  game_playercount_df = ''
  if os.path.exists(f'steam_dataset/PlayerCountHistoryPart1/{filename}'):
    game_playercount_df = pd.read_csv(f'steam_dataset/PlayerCountHistoryPart1/{filename}')
  elif os.path.exists(f'steam_dataset/PlayerCountHistoryPart2/{filename}'):
    game_playercount_df = pd.read_csv(f'steam_dataset/PlayerCountHistoryPart2/{filename}')
  else:
    averageplayercount.append(0)
    continue
  totalplayers = 0
  count = 0
  game_playercount_df['Playercount'] = game_playercount_df['Playercount'].fillna(0)
  for playercount in game_playercount_df['Playercount']:
    totalplayers+=playercount
    count+=1
  averageplayercount.append(round((totalplayers/count), 2))

df['averageplayercount'] = averageplayercount

df.head(30)

Unnamed: 0,appid,type,name,releasedate,freetoplay,averageprice,averageplayercount
0,578080,game,PLAYERUNKNOWN'S BATTLEGROUNDS,21-Dec-17,0.0,27.56,534928.79
1,570,game,Dota 2,9-Jul-13,1.0,0.0,459574.81
2,730,game,Counter-Strike: Global Offensive,21-Aug-12,1.0,0.0,427094.35
4,359550,game,Tom Clancy's Rainbow Six Siege,1-Dec-15,0.0,17.08,74232.19
5,271590,game,Grand Theft Auto V,13-Apr-15,0.0,25.88,74215.78
6,238960,game,Path of Exile,23-Oct-13,1.0,0.0,26815.41
7,230410,game,Warframe,25-Mar-13,1.0,0.0,50398.59
8,440,game,Team Fortress 2,10-Oct-07,1.0,0.0,50453.49
9,252950,game,Rocket League,7-Jul-15,0.0,18.14,35646.51
10,252490,game,Rust,8-Feb-18,0.0,34.55,42847.18


# 3. Data Visualization

0