In [37]:
import pandas as pd
import numpy as np
import altair as alt
import matplotlib.pyplot as plt
import json
import ast

alt.data_transformers.enable("default", max_rows=None)
alt.renderers.enable("mimetype")
alt.theme.enable("fivethirtyeight")

ThemeRegistry.enable('fivethirtyeight')

## Datasets & Cleaning

### Fortnite

In [38]:
rss_data = pd.read_csv("../assets/GG Rainbow Six Siege.csv")

In [39]:
rss_data

Unnamed: 0,Month,Unique Players,Gain,% Gain,Peak Players
0,Last 30 Days,53440,-8606,-13.9%,94285
1,August 2025,62046,-12098,-16.3%,100219
2,July 2025,74144,-6452,-8.0%,111052
3,June 2025,80596,+29486,+57.7%,141870
4,May 2025,51110,-10911,-17.6%,89179
...,...,...,...,...,...
114,March 2016,8094,-4088,-33.6%,11331
115,February 2016,12182,-1885,-13.4%,16052
116,January 2016,14067,+3507,+33.2%,20093
117,December 2015,10560,+10559,"+1,055,900.0%",17974


In [40]:
fields_to_keep = ['month', 'peak', 'gain', '% gain']

rss_data = rss_data.rename(columns={"Peak Players": "peak"})
rss_data.columns = rss_data.columns.str.lower()
rss_data = rss_data.replace("-", "0").replace("—", "0") # Replacing both the short - and long —
rss_data[['peak', 'gain', '% gain']] = rss_data[['peak', 'gain', '% gain']].replace({",": "", "%": ""}, regex=True).astype(float)
rss_data = rss_data[fields_to_keep][1:] # Getting rid of first row of Last 30 days
rss_data["month"] = pd.to_datetime(rss_data["month"], format="%B %Y")
rss_data = rss_data.sort_values("month")
rss_data


Unnamed: 0,month,peak,gain,% gain
118,2015-11-01,1.0,0.0,0.0
117,2015-12-01,17974.0,10559.0,1055900.0
116,2016-01-01,20093.0,3507.0,33.2
115,2016-02-01,16052.0,-1885.0,-13.4
114,2016-03-01,11331.0,-4088.0,-33.6
...,...,...,...,...
5,2025-04-01,107312.0,-18569.0,-23.0
4,2025-05-01,89179.0,-10911.0,-17.6
3,2025-06-01,141870.0,29486.0,57.7
2,2025-07-01,111052.0,-6452.0,-8.0


In [41]:
rss_data['gain'] = rss_data['peak']-rss_data['peak'].shift(1) # Recalculating gain to be based on peak players rather than unique players
rss_data['% gain'] = ((rss_data['peak']-rss_data['peak'].shift(1))*100/rss_data['peak'].shift(1)).round(1) # Recalculating gain %
rss_data = rss_data.iloc[1:] # Dropping the first row that has only 1 player
rss_data.iloc[0, 2:] = 0 # Setting the first value of gain and % gain to 0

rss_data = rss_data[rss_data["month"] >= "2020-06-01"] # Filtering rows before June 01 2020
rss_data

Unnamed: 0,month,peak,gain,% gain
63,2020-06-01,126511.0,-20062.0,-13.7
62,2020-07-01,122461.0,-4050.0,-3.2
61,2020-08-01,131847.0,9386.0,7.7
60,2020-09-01,94285.0,-37562.0,-28.5
59,2020-10-01,115584.0,21299.0,22.6
...,...,...,...,...
5,2025-04-01,107312.0,-38178.0,-26.2
4,2025-05-01,89179.0,-18133.0,-16.9
3,2025-06-01,141870.0,52691.0,59.1
2,2025-07-01,111052.0,-30818.0,-21.7


In [42]:
rss_data.to_csv("../assets/GG_Rainbow_Six_Siege_Clean.csv", encoding="utf-8")