In [4]:
import pandas as pd

## Steam popularity

In [5]:
steam_charts = pd.read_csv("../data/SteamCharts.csv", encoding='unicode_escape')

In [6]:
steam_charts.shape

(83789, 7)

In [7]:
steam_charts.sample(5)

Unnamed: 0,gamename,year,month,avg,gain,peak,avg_peak_perc
8381,Fishing Planet,2015,August,2058.52,,6651,30.9505%
60734,Supreme Commander 2,2014,October,381.83,-58.76,833,45.8379%
61505,Tower Unite,2020,July,331.8,69.96,585,56.7179%
79379,The Final Station,2016,June,0.81,-2.08,10,8.1%
29580,Half-Life,2018,June,499.52,169.86,1420,35.1775%


In [8]:
df_charts = steam_charts.copy(deep=True)

In [9]:
df_charts['month'].unique()

array(['February ', 'January ', 'December ', 'November ', 'October ',
       'September ', 'August ', 'July ', 'June ', 'May ', 'April ',
       'March '], dtype=object)

In [10]:
df_charts['month'] = df_charts['month'].str.strip()

In [11]:
# create new date column
df_charts['month_number'] = pd.to_datetime(df_charts['month'], format='%B').dt.month

In [12]:
df_charts.sort_values(by=['year', 'month'], inplace=True)

In [13]:
df_charts

Unnamed: 0,gamename,year,month,avg,gain,peak,avg_peak_perc,month_number
102,Counter-Strike: Global Offensive,2012,August,15475.39,14542.81,52261,29.6117%,8
206,Dota 2,2012,August,55768.61,3047.56,108689,51.3103%,8
449,Team Fortress 2,2012,August,61561.37,15994.47,117917,52.2074%,8
989,PAYDAY 2,2012,August,0.00,,0,NaN%,8
1405,Garry's Mod,2012,August,6867.33,-1098.61,10711,64.1147%,8
...,...,...,...,...,...,...,...,...
83428,Teeworlds,2021,January,37.25,-2.37,101,36.8812%,1
83495,Far Cry,2021,January,86.17,11.31,224,38.4688%,1
83599,Axiom Verge,2021,January,27.00,5.74,67,40.2985%,1
83670,Plug & Play,2021,January,1.49,0.31,6,24.8333%,1


In [14]:
print(f"Start year and month: {df_charts.iloc[0]['year']} {df_charts.iloc[0]['month']}")
print(f"End year and month: {df_charts.iloc[-1]['year']} {df_charts.iloc[-1]['month']}")

Start year and month: 2012 August
End year and month: 2021 January


## Steam game releases

In [15]:
steam_releases = pd.read_csv("../data/SteamReleases.csv")

In [16]:
steam_releases.sample(5)

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,About the game,Supported languages,...,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Screenshots,Movies
12790,740390,Terrorist Elimination,"Nov 7, 2017",0 - 20000,0,0,5.99,0,"GamePlay: Purchase guns and ammo in the store,...",['English'],...,0,0,0,ReverseGames,ReverseGames,Single-player,"Violent,Action,Casual","Action,Violent,Casual",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
20340,434890,Eight Mini Racers,"Jan 12, 2016",50000 - 100000,0,0,0.49,0,1 - 8 player local multiplayer 2D top-down rac...,['English'],...,0,212,0,David Mulder,SA Industry,"Single-player,Multi-player,Shared/Split Screen...","Casual,Racing","Racing,Casual",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
28122,1177650,Paper Shakespeare: To Date or Not To Date? 2,"Sep 23, 2020",0 - 20000,0,0,9.99,2,"King Alexander XI is dead, and without heir. Y...",['English'],...,0,0,0,Stegalosaurus Game Development,Stegalosaurus Game Development,"Single-player,Steam Achievements","Adventure,Indie,Strategy",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
53521,1094460,Tiger Tank 59 Ⅰ Battleship,"Jun 10, 2019",0 - 20000,0,0,0.0,100,'Tiger Tank 59' is a top-down shooting game In...,['English'],...,0,0,0,TigerQiuQiu,TigerQiuQiu,"Single-player,Steam Achievements,Full controll...","Action,Casual,Indie","Action,Indie,Casual",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
44744,1939940,Roll Player - The Board Game,"May 20, 2022",0 - 20000,1,0,14.99,0,Roll Player is a digital adaptation of the pop...,['English'],...,0,0,0,Mipmap,Mipmap,"Single-player,Multi-player,PvP,Shared/Split Sc...","Casual,Strategy,Early Access","Early Access,Strategy,Board Game,Character Cus...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...


In [17]:
for col in steam_releases.columns:
    print(f"{col}")

AppID
Name
Release date
Estimated owners
Peak CCU
Required age
Price
DLC count
About the game
Supported languages
Full audio languages
Reviews
Header image
Website
Support url
Support email
Windows
Mac
Linux
Metacritic score
Metacritic url
User score
Positive
Negative
Score rank
Achievements
Recommendations
Notes
Average playtime forever
Average playtime two weeks
Median playtime forever
Median playtime two weeks
Developers
Publishers
Categories
Genres
Tags
Screenshots
Movies


In [18]:
steam_releases.shape

(71716, 39)

In [19]:
df_releases = steam_releases.copy(deep=True)

Transform Release Date into uniform date because the column has two different formats: '%b %Y' (May 2020) and '%b %d, %Y' (May 21, 2020). New format: '%Y-%m-%d'

In [20]:
# convert different date formats
df_releases['temp_date'] = pd.to_datetime(df_releases['Release date'], format='%b %Y', errors='coerce')
df_releases['temp_date'] = df_releases['temp_date'].dt.strftime('%Y-%m-%d')

df_releases['temp_date_2'] = pd.to_datetime(df_releases['Release date'], format='%b %d, %Y', errors='coerce')
df_releases['temp_date_2'] = df_releases['temp_date_2'].dt.strftime('%Y-%m-%d')

In [22]:
# change dtype in df
df_releases['Release date'] = df_releases['temp_date'].combine_first(df_releases['temp_date_2'])
df_releases['Release date'] = pd.to_datetime(df_releases['Release date'])

In [23]:
# create and sort index
df_releases.sort_values(by='Release date', inplace=True)

In [24]:
print(f"Start date: {df_releases['Release date'].min()}\nEnd date: {df_releases['Release date'].max()}")

Start date: 1997-06-30 00:00:00
End date: 2025-04-14 00:00:00
