### Essential Libraries
Import the essential Python libraries.

NumPy : Library for Numeric Computations in Python

Pandas : Library for Data Acquisition and Preparation

Matplotlib : Low-level library for Data Visualization

Seaborn : Higher-level library for Data Visualization

In [157]:
# Basic Libraries
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt # we only need pyplot
sb.set() # set the default Seaborn style for graphics


import re
import plotly.figure_factory as ff
import plotly.express as px

In [158]:
df = pd.read_csv('steam_cleaned.csv')
df

Unnamed: 0.1,Unnamed: 0,name,genres,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price,...,SteamVR Collectibles,Online Co-op,Shared/Split Screen,Local Co-op,MMO,VR Support,Mods,Mods (require HL2),Steam Turn Notifications,age
0,0,Counter-Strike,Action,0,124534,3339,17612,317,15000000,7.19,...,0,0,0,0,0,0,0,0,0,8461
1,1,Team Fortress Classic,Action,0,3318,633,277,62,7500000,3.99,...,0,0,0,0,0,0,0,0,0,9041
2,2,Day of Defeat,Action,0,3416,398,187,34,7500000,3.99,...,0,0,0,0,0,0,0,0,0,7550
3,3,Deathmatch Classic,Action,0,1273,267,258,184,7500000,3.99,...,0,0,0,0,0,0,0,0,0,8249
4,4,Half-Life: Opposing Force,Action,0,5250,288,624,415,7500000,3.99,...,0,0,0,0,0,0,0,0,0,8827
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26559,27070,Room of Pandora,Adventure;Casual;Indie,7,3,0,0,0,10000,2.09,...,0,0,0,0,0,0,0,0,0,1713
26560,27071,Cyber Gun,Action;Adventure;Indie,0,8,1,0,0,10000,1.69,...,0,0,0,0,0,0,0,0,0,1714
26561,27072,Super Star Blast,Action;Casual;Indie,24,0,1,0,0,10000,3.99,...,0,0,1,0,0,0,0,0,0,1713
26562,27073,New Yankee 7: Deer Hunters,Adventure;Casual;Indie,0,2,0,0,0,10000,5.19,...,0,0,0,0,0,0,0,0,0,1720


Our question is : is single or multiplayer game more popular ?

For the response variable, we will use average_playtime.

In [159]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26564 entries, 0 to 26563
Data columns (total 40 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Unnamed: 0                  26564 non-null  int64  
 1   name                        26564 non-null  object 
 2   genres                      26564 non-null  object 
 3   achievements                26564 non-null  int64  
 4   positive_ratings            26564 non-null  int64  
 5   negative_ratings            26564 non-null  int64  
 6   average_playtime            26564 non-null  int64  
 7   median_playtime             26564 non-null  int64  
 8   owners                      26564 non-null  int64  
 9   price                       26564 non-null  float64
 10  Multi-player                26564 non-null  int64  
 11  Online Multi-Player         26564 non-null  int64  
 12  Local Multi-Player          26564 non-null  int64  
 13  Valve Anti-Cheat enabled    265

Drop the unnamed first column which was added automatically during export of cleaned data file.

In [160]:
df = df.drop(df.columns[0], axis=1)

In [161]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26564 entries, 0 to 26563
Data columns (total 39 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   name                        26564 non-null  object 
 1   genres                      26564 non-null  object 
 2   achievements                26564 non-null  int64  
 3   positive_ratings            26564 non-null  int64  
 4   negative_ratings            26564 non-null  int64  
 5   average_playtime            26564 non-null  int64  
 6   median_playtime             26564 non-null  int64  
 7   owners                      26564 non-null  int64  
 8   price                       26564 non-null  float64
 9   Multi-player                26564 non-null  int64  
 10  Online Multi-Player         26564 non-null  int64  
 11  Local Multi-Player          26564 non-null  int64  
 12  Valve Anti-Cheat enabled    26564 non-null  int64  
 13  Single-player               265

In [162]:
for col in df:
    print(col, ":", df[col].nunique())

name : 26523
genres : 1528
achievements : 409
positive_ratings : 2789
negative_ratings : 1489
average_playtime : 1337
median_playtime : 1302
owners : 13
price : 278
Multi-player : 2
Online Multi-Player : 2
Local Multi-Player : 2
Valve Anti-Cheat enabled : 2
Single-player : 2
Steam Cloud : 2
Steam Achievements : 2
Steam Trading Cards : 2
Captions available : 2
Partial Controller Support : 2
Includes Source SDK : 2
Cross-Platform Multiplayer : 2
Stats : 2
Commentary available : 2
Includes level editor : 2
Steam Workshop : 2
In-App Purchases : 2
Co-op : 2
Full controller support : 2
Steam Leaderboards : 2
SteamVR Collectibles : 2
Online Co-op : 2
Shared/Split Screen : 2
Local Co-op : 2
MMO : 2
VR Support : 2
Mods : 2
Mods (require HL2) : 2
Steam Turn Notifications : 2
age : 2619


### Categories

In [163]:
category_columns = ['categories_Multi-player', 'categories_Online Multi-Player', 'categories_Local Multi-Player', 'categories_Valve Anti-Cheat enabled',
                        'categories_Single-player', 'categories_Steam Cloud', 'categories_Steam Achievements', 'categories_Steam Trading Cards',
                        'categories_Captions available', 'categories_Partial Controller Support', 'categories_Includes Source SDK',
                        'categories_Cross-Platform Multiplayer', 'categories_Stats', 'categories_Commentary available',
                        'categories_Includes level editor', 'categories_Steam Workshop', 'categories_In-App Purchases',
                        'categories_Co-op', 'categories_Full controller support', 'categories_Steam Leaderboards',
                        'categories_SteamVR Collectibles', 'categories_Online Co-op', 'categories_Shared/Split Screen',
                        'categories_Local Co-op', 'categories_MMO', 'categories_VR Support',
                        'categories_Mods', 'categories_Mods (require HL2)', 'categories_Steam Turn Notifications']

cat_col_names = []

for col in sorted(category_columns):
    #col_name = re.sub(r'[\s\-\/]', '_', col.lower())
    #col_name = re.sub(r'[()]', '', col_name)
    cat_col_names.append(col_name)

#cat_counts = df[cat_col_names].sum()

In [164]:
eda_df = pd.DataFrame(zip(df['Multi-player'], df['Online Multi-Player'], df['Local Multi-Player'], df['Valve Anti-Cheat enabled'],
                        df['Single-player'], df['Steam Cloud'], df['Steam Achievements'], df['Steam Trading Cards'],
                        df['Captions available'], df['Partial Controller Support'], df['Includes Source SDK'],
                        df['Cross-Platform Multiplayer'], df['Stats'], df['Commentary available'],
                        df['Includes level editor'], df['Steam Workshop'], df['In-App Purchases'],
                        df['Co-op'], df['Full controller support'], df['Steam Leaderboards'],
                        df['SteamVR Collectibles'], df['Online Co-op'], df['Shared/Split Screen'],
                        df['Local Co-op'], df['MMO'], df['VR Support'],
                        df['Mods'], df['Mods (require HL2)'], df['Steam Turn Notifications']
                         ),
                      columns=['Multi-player', 'Online Multi-Player', 'Local Multi-Player', 'Valve Anti-Cheat enabled',
                        'Single-player', 'Steam Cloud', 'Steam Achievements', 'Steam Trading Cards',
                        'Captions available', 'Partial Controller Support', 'Includes Source SDK',
                        'Cross-Platform Multiplayer', 'Stats', 'Commentary available',
                        'Includes level editor', 'Steam Workshop', 'In-App Purchases',
                        'Co-op', 'Full controller support', 'Steam Leaderboards',
                        'SteamVR Collectibles', 'Online Co-op', 'Shared/Split Screen',
                        'Local Co-op', 'MMO', 'VR Support',
                        'Mods', 'Mods (require HL2)', 'Steam Turn Notifications'])

In [165]:
eda_df.head()

Unnamed: 0,Multi-player,Online Multi-Player,Local Multi-Player,Valve Anti-Cheat enabled,Single-player,Steam Cloud,Steam Achievements,Steam Trading Cards,Captions available,Partial Controller Support,...,Steam Leaderboards,SteamVR Collectibles,Online Co-op,Shared/Split Screen,Local Co-op,MMO,VR Support,Mods,Mods (require HL2),Steam Turn Notifications
0,1,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [166]:
dfCorr = eda_df.corr()
fig = ff.create_annotated_heatmap(
    z=dfCorr.values,
    x=list(dfCorr.columns),
    y=list(dfCorr.index),
    colorscale=px.colors.diverging.RdBu,
    annotation_text=dfCorr.round(2).values,
    showscale=True, reversescale=True)

for i in range(len(fig.layout.annotations)):
    fig.layout.annotations[i].font.size = 8

#plt.rcParams['figure.figsize'] = [100, 30]
fig.show()

In [167]:
df.head()

Unnamed: 0,name,genres,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price,Multi-player,...,SteamVR Collectibles,Online Co-op,Shared/Split Screen,Local Co-op,MMO,VR Support,Mods,Mods (require HL2),Steam Turn Notifications,age
0,Counter-Strike,Action,0,124534,3339,17612,317,15000000,7.19,1,...,0,0,0,0,0,0,0,0,0,8461
1,Team Fortress Classic,Action,0,3318,633,277,62,7500000,3.99,1,...,0,0,0,0,0,0,0,0,0,9041
2,Day of Defeat,Action,0,3416,398,187,34,7500000,3.99,1,...,0,0,0,0,0,0,0,0,0,7550
3,Deathmatch Classic,Action,0,1273,267,258,184,7500000,3.99,1,...,0,0,0,0,0,0,0,0,0,8249
4,Half-Life: Opposing Force,Action,0,5250,288,624,415,7500000,3.99,1,...,0,0,0,0,0,0,0,0,0,8827
