# 1.Analysis of Steam database for historical trends:

We will start by examining historical data from the Steam platform, focusing on the 10 most popular games labelled "Automobile Sim" from 2013 to 2023. The data comes from .csv files provided by the SteamDB website. Through this analysis, we aim to discover trends, patterns and fluctuations in the popularity of racing simulation games over the last decade.

we got .csv files fromn the SteamDB platforms importated locally.

## Import libraries


In [230]:
import os
import numpy as np
import pandas as pd

## Load & concatenate all data sets
all .csv files have the same structure, same variables.

In [231]:
folder_path = '/Users/macbook/Dropbox/Mac/Documents/Pro/Data_Analyst/simracing-players/data/raw'
dfs = {}

for file in os.listdir(folder_path):
    if file.endswith('.csv'):
        game_name = os.path.splitext(file)[0]  # Extract game name from filename
        df = pd.read_csv(os.path.join(folder_path, file))
        df['game'] = game_name
        dfs[game_name] = df


In [232]:
merged_df = pd.concat(dfs.values(), ignore_index=True)

### Checking the load & merge

In [233]:
merged_df.head(20)

Unnamed: 0,DateTime,Players,Average Players,Twitch Viewers,game
0,2013-10-01 00:00:00,8.0,,,Assetto_corsa
1,2013-10-02 00:00:00,,,,Assetto_corsa
2,2013-10-03 00:00:00,,,,Assetto_corsa
3,2013-10-04 00:00:00,,,,Assetto_corsa
4,2013-10-05 00:00:00,,,,Assetto_corsa
5,2013-10-06 00:00:00,,,,Assetto_corsa
6,2013-10-07 00:00:00,,,,Assetto_corsa
7,2013-10-08 00:00:00,,,,Assetto_corsa
8,2013-10-09 00:00:00,,,,Assetto_corsa
9,2013-10-10 00:00:00,,,,Assetto_corsa


In [234]:
merged_df.tail(20)

Unnamed: 0,DateTime,Players,Average Players,Twitch Viewers,game
40934,2024-01-09 10:40:00,699.0,,22.0,DiRT_rally_2.0_
40935,2024-01-09 10:50:00,692.0,,22.0,DiRT_rally_2.0_
40936,2024-01-09 11:00:00,712.0,966.0,11.0,DiRT_rally_2.0_
40937,2024-01-09 11:10:00,729.0,,11.0,DiRT_rally_2.0_
40938,2024-01-09 11:20:00,753.0,,11.0,DiRT_rally_2.0_
40939,2024-01-09 11:30:00,776.0,,11.0,DiRT_rally_2.0_
40940,2024-01-09 11:40:00,800.0,,11.0,DiRT_rally_2.0_
40941,2024-01-09 11:50:00,828.0,,11.0,DiRT_rally_2.0_
40942,2024-01-09 12:00:00,843.0,966.0,14.0,DiRT_rally_2.0_
40943,2024-01-09 12:10:00,852.0,,14.0,DiRT_rally_2.0_


we see that in 2013, we had one records of player per day. In our most recent records, it was done every 10 mins.

In [235]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40954 entries, 0 to 40953
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   DateTime         40954 non-null  object 
 1   Players          38509 non-null  float64
 2   Average Players  11811 non-null  float64
 3   Twitch Viewers   36883 non-null  float64
 4   game             40954 non-null  object 
dtypes: float64(3), object(2)
memory usage: 1.6+ MB


We need to change the data type of datetime.

In [236]:
round(merged_df.describe())

Unnamed: 0,Players,Average Players,Twitch Viewers
count,38509.0,11811.0,36883.0
mean,8678.0,8268.0,1177.0
std,10489.0,8194.0,3608.0
min,1.0,265.0,0.0
25%,1805.0,2209.0,57.0
50%,4518.0,6389.0,290.0
75%,11379.0,10886.0,930.0
max,81096.0,44392.0,127965.0


## Data preparation & cleaning

Let's rename the column headers

In [237]:
print(merged_df.columns)

Index(['DateTime', 'Players', 'Average Players', 'Twitch Viewers', 'game'], dtype='object')


In [238]:
# rename column headers
merged_df.columns = ['datetime', 'players', 'average_players', 'twitch_viewers', 'game']

In [239]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40954 entries, 0 to 40953
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   datetime         40954 non-null  object 
 1   players          38509 non-null  float64
 2   average_players  11811 non-null  float64
 3   twitch_viewers   36883 non-null  float64
 4   game             40954 non-null  object 
dtypes: float64(3), object(2)
memory usage: 1.6+ MB


In [240]:
merged_df['datetime'] = merged_df['datetime'].astype('datetime64[s]')
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40954 entries, 0 to 40953
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype        
---  ------           --------------  -----        
 0   datetime         40954 non-null  datetime64[s]
 1   players          38509 non-null  float64      
 2   average_players  11811 non-null  float64      
 3   twitch_viewers   36883 non-null  float64      
 4   game             40954 non-null  object       
dtypes: datetime64[s](1), float64(3), object(1)
memory usage: 1.6+ MB


In [241]:
#check for mixed types
for col in merged_df.columns.tolist():
  weird = (merged_df[[col]].map(type) != merged_df[[col]].iloc[0].apply(type)).any(axis = 1)
  if len (merged_df[weird]) > 0:
    print (col)

In [242]:
merged_df.tail()

Unnamed: 0,datetime,players,average_players,twitch_viewers,game
40949,2024-01-09 13:10:00,913.0,,18.0,DiRT_rally_2.0_
40950,2024-01-09 13:20:00,923.0,,18.0,DiRT_rally_2.0_
40951,2024-01-09 13:30:00,,,18.0,DiRT_rally_2.0_
40952,2024-01-09 13:40:00,,,18.0,DiRT_rally_2.0_
40953,2024-01-09 13:50:00,,,18.0,DiRT_rally_2.0_


In [243]:
# Checking the null values
print(merged_df.isnull().sum())


datetime               0
players             2445
average_players    29143
twitch_viewers      4071
game                   0
dtype: int64


In [244]:
# Check for duplicates
print(merged_df.duplicated().sum())


0


In [245]:
# have a look on Null values 
null = merged_df[merged_df[['players', 'average_players', 'twitch_viewers']].isnull().all(axis=1)]
null

Unnamed: 0,datetime,players,average_players,twitch_viewers,game
1,2013-10-02,,,,Assetto_corsa
2,2013-10-03,,,,Assetto_corsa
3,2013-10-04,,,,Assetto_corsa
4,2013-10-05,,,,Assetto_corsa
5,2013-10-06,,,,Assetto_corsa
...,...,...,...,...,...
37494,2018-10-15,,,,DiRT_rally_2.0_
37495,2018-10-16,,,,DiRT_rally_2.0_
37496,2018-10-17,,,,DiRT_rally_2.0_
37497,2018-10-18,,,,DiRT_rally_2.0_


Let's delete all the records with null values in ALL columns except game

In [246]:
merged_df = merged_df.dropna(subset=['players', 'average_players', 'twitch_viewers'], how='all')
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 39823 entries, 0 to 40953
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype        
---  ------           --------------  -----        
 0   datetime         39823 non-null  datetime64[s]
 1   players          38509 non-null  float64      
 2   average_players  11811 non-null  float64      
 3   twitch_viewers   36883 non-null  float64      
 4   game             39823 non-null  object       
dtypes: datetime64[s](1), float64(3), object(1)
memory usage: 1.8+ MB


players, average players & twitch viewers are individuals so the data type must be integer.

In [247]:
print(merged_df.isnull().sum())

datetime               0
players             1314
average_players    28012
twitch_viewers      2940
game                   0
dtype: int64


we need to replace NA values by 0 before changing the data type.

In [248]:
# replace NA values by 0
merged_df[['players', 'average_players', 'twitch_viewers']] = merged_df[['players', 'average_players', 'twitch_viewers']].fillna(0)
print(merged_df.isnull().sum())

datetime           0
players            0
average_players    0
twitch_viewers     0
game               0
dtype: int64


In [249]:
# Change data type from float to integer
merged_df[['players', 'average_players', 'twitch_viewers']] = merged_df[['players', 'average_players', 'twitch_viewers']].astype(int)

# Display changes
print(merged_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 39823 entries, 0 to 40953
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype        
---  ------           --------------  -----        
 0   datetime         39823 non-null  datetime64[s]
 1   players          39823 non-null  int64        
 2   average_players  39823 non-null  int64        
 3   twitch_viewers   39823 non-null  int64        
 4   game             39823 non-null  object       
dtypes: datetime64[s](1), int64(3), object(1)
memory usage: 1.8+ MB
None


In [250]:
merged_df['game'].unique()

array(['Assetto_corsa', 'Forza Horizon 5 Steam Charts',
       'Forza Horizon 4 Steam Charts', 'Assetto_corsa_competizione',
       'ATS Price', 'BeamNG', 'Euro_truck_2',
       'Automobilista 2 Steam Charts', 'CarX_drift_racing_online',
       'DiRT_rally_2.0_'], dtype=object)

Let's modify the name of game records to keep consistency

In [251]:
# Normalize records from 'game' field
merged_df['game'] = merged_df['game'].replace({
    'Forza Horizon 5 Steam Charts': 'Forza_horizon_5',
    'Forza Horizon 4 Steam Charts':'Forza_horizon_4',
    'ATS Price': 'American_truck_simulator',
    'Automobilista 2 Steam Charts': 'Automobilista_2',
    'DiRT_rally_2.0_': 'Dirt_rally_2.0' 
})
# checking changes
merged_df['game'].unique()

array(['Assetto_corsa', 'Forza_horizon_5', 'Forza_horizon_4',
       'Assetto_corsa_competizione', 'American_truck_simulator', 'BeamNG',
       'Euro_truck_2', 'Automobilista_2', 'CarX_drift_racing_online',
       'Dirt_rally_2.0'], dtype=object)

## Exploratary analysis & visualisation

In [252]:
round(merged_df.describe())


Unnamed: 0,datetime,players,average_players,twitch_viewers
count,39823,39823.0,39823.0,39823.0
mean,2021-09-15 18:51:56,8392.0,2452.0,1090.0
min,2013-01-01 00:00:00,0.0,0.0,0.0
25%,2019-11-27 00:00:00,1604.0,0.0,35.0
50%,2022-10-26 00:00:00,4271.0,0.0,240.0
75%,2023-12-31 19:30:00,10992.0,1266.0,824.0
max,2024-02-22 16:30:00,81096.0,44392.0,127965.0
std,,10431.0,5846.0,3485.0


we have records from 1st january of 2013 until february 2024.
We will keep only records until 31st december of 2023. Let's add a year column.

In [253]:
# remove records from 2024
df_2013_2023 = merged_df[merged_df['datetime'] <= '2023-12-31']
round(df_2013_2023.describe())


Unnamed: 0,datetime,players,average_players,twitch_viewers
count,29501,29501.0,29501.0,29501.0
mean,2020-11-23 04:06:41,8569.0,2617.0,1322.0
min,2013-01-01 00:00:00,0.0,0.0,0.0
25%,2019-01-08 00:00:00,1454.0,0.0,33.0
50%,2021-05-26 00:00:00,3995.0,0.0,275.0
75%,2023-06-05 00:00:00,11325.0,1554.0,1137.0
max,2023-12-31 00:00:00,81096.0,44392.0,127965.0
std,,10923.0,6126.0,3980.0


### Overview of records between 2013-2023:

In [254]:
import plotly.express as px

# plot representing records in the 2013-2023 time period
fig = px.histogram(df_2013_2023, x='datetime', template='plotly_dark')
fig.show()

We have a lot of records the last month of 2023. Let's create new 'month' & 'year' columns

In [255]:
# Extract month from the 'datetime' column
df_2013_2023['month'] = df_2013_2023['datetime'].dt.month

# Extract year from the 'datetime' column
df_2013_2023['year'] = df_2013_2023['datetime'].dt.year

# Filter the DataFrame to keep records for the year 2023
df_2023 = df_2013_2023[df_2013_2023['year'] == 2023]




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Visualize the count of records of year 2023: 

In [256]:
import plotly.express as px

# Create the histogram with month of 2023
hist_2023 = px.histogram(df_2023, x='month', template='plotly_dark')

# Update layout to remove gridlines
hist_2023.update_layout(
    xaxis=dict(showgrid=False), 
    yaxis=dict(showgrid=False)
)

hist_2023.show()


In [257]:
dec = df_2023[df_2023['month'].isin([12])]


In [258]:
dec.head(30)

Unnamed: 0,datetime,players,average_players,twitch_viewers,game,month,year
3713,2023-12-01 00:00:00,14820,8643,2414,Assetto_corsa,12,2023
3714,2023-12-02 00:00:00,16840,10592,889,Assetto_corsa,12,2023
3715,2023-12-03 00:00:00,15706,10297,898,Assetto_corsa,12,2023
3716,2023-12-04 00:00:00,13087,7331,1628,Assetto_corsa,12,2023
3717,2023-12-05 00:00:00,12754,7335,5271,Assetto_corsa,12,2023
3718,2023-12-06 00:00:00,13015,7565,1405,Assetto_corsa,12,2023
3719,2023-12-07 00:00:00,13951,7870,617,Assetto_corsa,12,2023
3720,2023-12-08 00:00:00,15604,9030,606,Assetto_corsa,12,2023
3721,2023-12-09 00:00:00,16517,10962,2677,Assetto_corsa,12,2023
3722,2023-12-10 00:00:00,16495,10848,2761,Assetto_corsa,12,2023


From 12-12-2023, the records get hourly. to keep consistency, we will resample the data to daily with the records at 00:00:00


In [272]:
# convert the 'datetime' column to a DateTimeIndex
df_2013_2023['datetime'] = pd.to_datetime(df_2013_2023['datetime'])  # Convert 'datetime' column to datetime type
df_2013_2023.set_index('datetime', inplace=True)  # Set 'datetime' column as index
df_2013_2023

KeyError: 'datetime'

In [259]:
# Split the DataFrame into two parts
df_before = df_2013_2023[df_2013_2023.index < '2023-12-12']
df_after = df_2013_2023[df_2013_2023.index >= '2023-12-12']

# Resample the second part at the daily frequency, selecting only records at 00:00:00
df_after_resampled = df_after.resample('D').first()

# Concatenate the two parts back together
df_resampled = pd.concat([df_before, df_after_resampled])

# Now df_resampled contains the original data before December 12, 2023, and resampled data from December 12, 2023, onwards


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4017 entries, 2013-01-01 to 2023-12-31
Freq: D
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   players          3907 non-null   float64
 1   average_players  3907 non-null   float64
 2   twitch_viewers   3907 non-null   float64
 3   game             3907 non-null   object 
 4   month            3907 non-null   float64
 5   year             3907 non-null   float64
dtypes: float64(5), object(1)
memory usage: 219.7+ KB




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Check now our record counts

In [260]:
df_2013_2023.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 29501 entries, 2013-10-01 00:00:00 to 2023-12-31 00:00:00
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   players          29501 non-null  int64 
 1   average_players  29501 non-null  int64 
 2   twitch_viewers   29501 non-null  int64 
 3   game             29501 non-null  object
 4   month            29501 non-null  int32 
 5   year             29501 non-null  int32 
dtypes: int32(2), int64(3), object(1)
memory usage: 1.4+ MB


In [261]:
dec_new = df_daily[df_daily['month'].isin([12])]
dec_new.head(30)

Unnamed: 0_level_0,players,average_players,twitch_viewers,game,month,year
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-12-01,1667.0,0.0,0.0,Assetto_corsa,12.0,2013.0
2013-12-02,4688.0,0.0,0.0,Euro_truck_2,12.0,2013.0
2013-12-03,4144.0,0.0,0.0,Euro_truck_2,12.0,2013.0
2013-12-04,3846.0,0.0,0.0,Euro_truck_2,12.0,2013.0
2013-12-05,3747.0,0.0,0.0,Euro_truck_2,12.0,2013.0
2013-12-06,3972.0,0.0,0.0,Euro_truck_2,12.0,2013.0
2013-12-07,3939.0,0.0,0.0,Euro_truck_2,12.0,2013.0
2013-12-08,3517.0,0.0,0.0,Euro_truck_2,12.0,2013.0
2013-12-09,3199.0,0.0,0.0,Euro_truck_2,12.0,2013.0
2013-12-10,2963.0,0.0,0.0,Euro_truck_2,12.0,2013.0


In [262]:
# Create the histogram with month of 2023
fig = px.histogram(df_resampled, x='month', template='plotly_dark')

# Update layout to remove gridlines
fig.update_layout(
    xaxis=dict(showgrid=False), 
    yaxis=dict(showgrid=False)
)

fig.show()



In [263]:
# filter data records from last 4 years
df_2020_2023 = df_2013_2023[df_2013_2023['year']>2019]
# Create a pivot table to summarize player counts by game and year
pivot_table = round(df_2013_2023.pivot_table(index='game', columns='year', values='average_players'))
pivot_table

year,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
American_truck_simulator,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1663.0,6351.0
Assetto_corsa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1961.0,9498.0
Assetto_corsa_competizione,,,,,,0.0,0.0,0.0,0.0,548.0,2513.0
Automobilista_2,,,,,,,,0.0,0.0,103.0,559.0
BeamNG,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2408.0,9161.0
CarX_drift_racing_online,,,,,0.0,0.0,0.0,0.0,0.0,602.0,2590.0
Dirt_rally_2.0,,,,,0.0,0.0,0.0,0.0,0.0,273.0,1100.0
Euro_truck_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5843.0,20468.0
Forza_horizon_4,,,,,,0.0,0.0,0.0,0.0,2032.0,6100.0
Forza_horizon_5,,,,,,,,,0.0,2220.0,12380.0


In [264]:
df_2013_2023.melt(id_vars='game')

Unnamed: 0,game,variable,value
0,Assetto_corsa,players,8
1,Assetto_corsa,players,1464
2,Assetto_corsa,players,1464
3,Assetto_corsa,players,1363
4,Assetto_corsa,players,1667
...,...,...,...
147500,Dirt_rally_2.0,year,2023
147501,Dirt_rally_2.0,year,2023
147502,Dirt_rally_2.0,year,2023
147503,Dirt_rally_2.0,year,2023


In [265]:
import plotly.graph_objects as go

# Create a heatmap using Plotly
fig = go.Figure(data=go.Heatmap(
    z=pivot_table.values,
    x=pivot_table.columns,
    y=pivot_table.index,
    colorbar=dict(title='Player Counts'
    )
))

# Update layout
fig.update_layout(
    title='Player counts by game',
    template='plotly_dark',
    xaxis=dict(showgrid=False),
    yaxis=dict(showgrid=False)
    )


# Show the heatmap
fig.show()

### Remove all data records to keep only one daily


In [266]:
# remove 

## Viz of players


In [267]:
fig = px.line(dec, 
    x='datetime',
    y='players',
    color='game',
    title='Number of Players per Game',
    template='plotly_dark')

# show line chart with 
fig.show()

We see that the number of records from december 12th 2023 is hourly. let's see for all the games 

In [268]:
game_counts = df_2013_2023.groupby('game').size()
print(game_counts)


game
American_truck_simulator      3119
Assetto_corsa                 3720
Assetto_corsa_competizione    2418
Automobilista_2               1854
BeamNG                        4173
CarX_drift_racing_online      2858
Dirt_rally_2.0                2568
Euro_truck_2                  4863
Forza_horizon_4               2513
Forza_horizon_5               1415
dtype: int64


In [269]:
df_2013_2023.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 29501 entries, 2013-10-01 00:00:00 to 2023-12-31 00:00:00
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   players          29501 non-null  int64 
 1   average_players  29501 non-null  int64 
 2   twitch_viewers   29501 non-null  int64 
 3   game             29501 non-null  object
 4   month            29501 non-null  int32 
 5   year             29501 non-null  int32 
dtypes: int32(2), int64(3), object(1)
memory usage: 1.4+ MB


In [270]:
beamng = df_2013_2023[df_2013_2023['game'] == 'BeamNG']
print(beamng[['datetime']])


KeyError: "None of [Index(['datetime'], dtype='object')] are in the [columns]"

In [None]:
game_stats = df_2013_2023.groupby('game').agg({'players':['mean','min','max']})
game_stats

Unnamed: 0_level_0,players,players,players
Unnamed: 0_level_1,mean,min,max
game,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
American_truck_simulator,5475.430266,1,19138
Assetto_corsa,6155.001613,0,19317
Assetto_corsa_competizione,3175.897436,143,9416
Automobilista_2,551.025351,31,1696
BeamNG,7640.69111,186,25792
CarX_drift_racing_online,1889.6655,1,5369
Dirt_rally_2.0,1322.518302,0,3533
Euro_truck_2,26381.746453,653,69754
Forza_horizon_4,6528.85953,0,40399
Forza_horizon_5,13250.641696,0,81096


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Relationship between players & twitch viewers
fig = px.scatter(df_2023, x="average_players", y="twitch_viewers", color="game", template='plotly_dark')

    # Update layout to remove gridlines
    fig.update_layout(
        xaxis=dict(showgrid=False), 
        yaxis=dict(showgrid=False)
)

# Show the plot
fig.show()



In [None]:
import plotly.graph_objects as go

# Aggregate data to calculate the average players across all games
global_trend = df_2013_2023.groupby('datetime')['players'].mean().reset_index()

# Create line chart for global trend
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=global_trend['datetime'],
        y=global_trend['players'],
        mode='lines',
        name='Global Trend',
        line=dict(color='purple', width=2)
    )
)

fig.update_layout(
    title='Global Trend of Average Players',
    yaxis_title='Average Players',
    template='plotly_dark',
    xaxis=dict(showgrid=False),
    yaxis=dict(showgrid=False)
)

fig.show()


In [None]:
import plotly.graph_objects as go
import plotly.express as px

# Calculate the average player count across all games
average_players = df_2013_2023['players'].mean()

# Create line chart for the current game
line_chart = px.line(df_2013_2023, x='datetime', y='players', color='game', title='Number of players per game', template='plotly_dark')

# Add date picker widget with customized button colors using CSS
line_chart.update_layout(
    xaxis=dict(
      showgrid=False,
        rangeselector=dict(
            buttons=list([
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(count=1, label="1y", step="year", stepmode="backward"),
                dict(count=5, label="5y", step="year", stepmode="backward"),
                dict(step="all")
            ]),
            bgcolor="rgba(255, 255, 255, 0.5)",  # Background color of the rangeselector
            activecolor="rgba(0, 0, 0, 0.8)",  # Active button color
            bordercolor="rgba(0, 0, 0, 0.2)",  # Border color
            borderwidth=1,  # Border width
            font=dict(color="rgba(0, 0, 0, 0.8)")  # Font color
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    ),
    yaxis=dict(showgrid=False),
)

# Show line chart with all games separately
line_chart.show()


In [None]:
import plotly.graph_objects as go
# Calculate the average player count across all games
average_players = df_2013_2023['players'].mean()

# Create line chart for the current game
line_chart = px.line(df_2013_2023, x='datetime', y='players', color='game', title='Number of players per game', template='plotly_dark')
# Update layout to remove gridlines
fig.update_layout(
    xaxis=dict(showgrid=False), 
    yaxis=dict(showgrid=False)
)
# show line chart with all game separatly
line_chart.show()

In [None]:
matrix = px.scatter_matrix(
    df_2013_2023,
    dimensions=['players','twitch_viewers'],
    color='game'
)
matrix.show()

In [None]:
boxplot = px.box(df_2013_2023,x='players', template='plotly_dark')
boxplot.show()

## Ask & Answer questions

## Summary & conclusion

### *Limitations*
- Records come only for steam marketplace
- During our period of analysis we only had 1 record per day for players count.
