In [1]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import json
import copy
import gzip
import matplotlib.colors as mcolors
import country_converter as coco

custom_params = {"axes.spines.right": False, "axes.spines.top": False}
sns.set_theme(style="ticks", rc=custom_params)
#sns.set(font='Adobe Devanagari')
sns.set_context("paper", font_scale=1, rc={"lines.linewidth": 0.5, "grid.linewidth": 0.3})


matplotlib.rcParams['mathtext.fontset'] = 'cm'
matplotlib.rcParams['font.family'] = 'STIXGeneral'
# Not italized latex
matplotlib.rcParams['mathtext.default'] = 'regular'
matplotlib.rcParams["font.weight"] = "light"

plt.style.use('themes/rose-pine-moon.mplstyle')


%config InlineBackend.figure_format='svg'

In [2]:
# Load the data
data = pd.read_feather('data/temp/data.feather')

In [3]:
print(len(data))
data.head()


1821316


Unnamed: 0,date,primaryTime,emulated,name,releaseDate,name_category,name_user,location,platform_name
8,2021-12-22,449.533,False,RE:RUN,2020-08-08,Any%,quebecpower,ca/qc,PC
9,2019-02-09,194.0,False,Time Warpers,2018-11-02,Zones 1-100,_zuR,br,PC
11,2014-11-17,857.0,True,Kirby's Dream Land,1992-04-27,Normal Mode,SapphireYoshi,us,Game Boy
12,2014-11-17,3062.0,False,Kirby: Nightmare in Dream Land,2002-10-25,Any%,SapphireYoshi,us,Game Boy Advance
13,2014-11-17,2454.0,False,Kirby & The Amazing Mirror,2004-04-15,Any%,SapphireYoshi,us,Game Boy Advance


In [4]:
# Find rows with user named korea_origin
korea_origin = data[data['name_user'] == 'korea_origin']

In [5]:
korea_origin

Unnamed: 0,date,primaryTime,emulated,name,releaseDate,name_category,name_user,location,platform_name
1514300,2021-05-27,5669.0,False,Sonic Mania,2017-08-15,Sonic & Tails,korea_origin,kr,PC
1514301,2021-04-06,5848.029,False,Sonic Mania,2017-08-15,Sonic & Tails,korea_origin,kr,PC
1514303,2021-03-20,6064.0,False,Sonic Mania,2017-08-15,Sonic & Tails,korea_origin,kr,PC
1514304,2021-06-23,5091.0,False,Sonic Mania,2017-08-15,Sonic & Tails,korea_origin,kr,PC
1514305,2021-03-17,6152.0,False,Sonic Mania,2017-08-15,Sonic & Tails,korea_origin,kr,PC
1514307,2021-06-10,5524.0,False,Sonic Mania,2017-08-15,Sonic & Tails,korea_origin,kr,PC
1514308,2021-05-29,5619.0,False,Sonic Mania,2017-08-15,Sonic & Tails,korea_origin,kr,PC


In [6]:
# Find the games ("name") with the most submissions, and keep the top 100, along with their release date and platform
top_games = data[["name", "name_category"]].value_counts()


# Convert the index to columns
top_games = top_games.reset_index()

# For each game, find the first release date and platform
top_games_data = data[data["name"].isin(top_games["name"])].groupby(["name", "name_category"]).agg(
    {"releaseDate": "first", "platform_name": "first"}
)


# Add count of submissions for each game and category in the top games
top_games_data = top_games_data.join(data[["name", "name_category"]].value_counts(), on=["name", "name_category"])


# Add sum of all submissions for each game in the top games
top_games_data["sum_count"] = top_games_data["count"].groupby("name").transform("sum")

# Sort by count of submissions (descending)
top_games_data = top_games_data.sort_values(["sum_count", "count"], ascending=False)

# Remove any rown that have NaN or None
top_games_data = top_games_data.dropna()

# Reset the index
top_games_data = top_games_data.reset_index()

# Keep the 100 unique games (name) with the most submissions, disregarding the category
top_games_data_names = top_games_data.drop_duplicates(subset=["name"]).head(1000)

# Only keep the games that are in the top 100 games with the most submissions
top_games_data = top_games_data[top_games_data["name"].isin(top_games_data_names["name"])]

# Fill NaN values with 0
top_games_data = top_games_data.fillna(0)

top_games_data

Unnamed: 0,name,name_category,releaseDate,platform_name,count,sum_count
0,Super Mario 64,16 Star,1996-06-23,Nintendo 64,17485,34823
1,Super Mario 64,70 Star,1996-06-23,Nintendo 64,9610,34823
2,Super Mario 64,120 Star,1996-06-23,Nintendo 64,4767,34823
3,Super Mario 64,1 Star,1996-06-23,Nintendo 64,2248,34823
4,Super Mario 64,0 Star,1996-06-23,Nintendo 64,713,34823
...,...,...,...,...,...,...
8041,The Witness Randomizer,Sigma Expert Double,2018-10-27,PC,4,288
8042,Kirby's Return to Dream Land Deluxe,Any% Extra Mode,2023-02-24,Switch,3,288
8043,Kirby's Return to Dream Land Deluxe,Kirby Master,2023-02-24,Switch,3,288
8044,S3AIR - Category Extensions,All Achievements,2019-06-01,PC,3,288


In [7]:
# Only keep the data for the top 100 games
data_top_1000 = data[data["name"].isin(top_games_data["name"])]

# Drop platform_name for this analysis
data_top_1000 = data_top_1000.drop(columns=["platform_name"])

data_top_1000

Unnamed: 0,date,primaryTime,emulated,name,releaseDate,name_category,name_user,location
8,2021-12-22,449.533,False,RE:RUN,2020-08-08,Any%,quebecpower,ca/qc
11,2014-11-17,857.000,True,Kirby's Dream Land,1992-04-27,Normal Mode,SapphireYoshi,us
12,2014-11-17,3062.000,False,Kirby: Nightmare in Dream Land,2002-10-25,Any%,SapphireYoshi,us
13,2014-11-17,2454.000,False,Kirby & The Amazing Mirror,2004-04-15,Any%,SapphireYoshi,us
14,2015-04-28,2274.000,True,Goof Troop,1993-07-11,Goofy,SapphireYoshi,us
...,...,...,...,...,...,...,...,...
3995704,2022-09-08,1628.340,False,Superliminal,2019-11-12,Glitchless,Valarius,de
3995705,2022-09-17,1537.460,False,Superliminal,2019-11-12,Glitchless,Valarius,de
3995706,2022-10-23,1511.720,False,Superliminal,2019-11-12,Glitchless,Valarius,de
3995722,2020-10-06,306.250,False,"Give Up, Robot",2010-05-06,New Game,TripleHaven,us


In [8]:
# Convert column date to datetime
data_top_1000["date"] = pd.to_datetime(data_top_1000["date"])

# Only keep the data from 2012 onwards
data_top_1000 = data_top_1000[data_top_1000["date"].dt.year >= 2012]

data_top_1000

Unnamed: 0,date,primaryTime,emulated,name,releaseDate,name_category,name_user,location
8,2021-12-22,449.533,False,RE:RUN,2020-08-08,Any%,quebecpower,ca/qc
11,2014-11-17,857.000,True,Kirby's Dream Land,1992-04-27,Normal Mode,SapphireYoshi,us
12,2014-11-17,3062.000,False,Kirby: Nightmare in Dream Land,2002-10-25,Any%,SapphireYoshi,us
13,2014-11-17,2454.000,False,Kirby & The Amazing Mirror,2004-04-15,Any%,SapphireYoshi,us
14,2015-04-28,2274.000,True,Goof Troop,1993-07-11,Goofy,SapphireYoshi,us
...,...,...,...,...,...,...,...,...
3995704,2022-09-08,1628.340,False,Superliminal,2019-11-12,Glitchless,Valarius,de
3995705,2022-09-17,1537.460,False,Superliminal,2019-11-12,Glitchless,Valarius,de
3995706,2022-10-23,1511.720,False,Superliminal,2019-11-12,Glitchless,Valarius,de
3995722,2020-10-06,306.250,False,"Give Up, Robot",2010-05-06,New Game,TripleHaven,us


In [9]:
dic_score = {1: 10, 2: 5, 3:3, 4:2, 5:1} 

In [10]:
# Assuming data_top_1000 is your DataFrame
# Ensure 'date' is in datetime format and extract the year and month
data_top_1000['date'] = pd.to_datetime(data_top_1000['date'])
data_top_1000['year_month'] = data_top_1000['date'].dt.to_period('M')

# Define the score dictionary
dic_score = {1: 10, 2: 5, 3: 3, 4: 2, 5: 1}

# Sort the dataframe by 'name' (game), 'name_user' (runner), and 'date' (chronologically)
data_top_1000 = data_top_1000.sort_values(by=['name', 'name_category', 'name_user', 'date'])

# Initialize a list to store the results for each month
results = []
l_data_top_100_month = []
# Iterate over each unique month in the dataset
# Sort the months before looping
sorted_months = sorted(data_top_1000['year_month'].unique())

for month in sorted_months[::3] + sorted_months[-1:]:
    # Filter data up to and including the current month
    data_top_1000_month = data_top_1000[data_top_1000['year_month'] <= month].copy()

    # Compute the best time for each runner (lowest primaryTime)
    data_top_1000_month['best_time'] = data_top_1000_month.groupby(['name', 'name_category', 'name_user'])['primaryTime'].transform('min')

    # Remove all the runs that are not the best time for each runner
    data_top_1000_month = data_top_1000_month[data_top_1000_month['primaryTime'] == data_top_1000_month['best_time']]
    
    # Remove any potential duplicates
    data_top_1000_month = data_top_1000_month.drop_duplicates(subset=['name', 'name_category', 'name_user', 'best_time'])

    # Rank runners based on their best time up to this month across all previous submissions
    data_top_1000_month['rank'] = data_top_1000_month.groupby(['name', 'name_category'])['best_time'].rank(method='min', ascending=True)

    # Assign scores based on the rank
    data_top_1000_month['score'] = data_top_1000_month['rank'].map(dic_score).fillna(0)  # fill NaN with 0 if rank > 5

    # Extract country from location
    data_top_1000_month['country'] = data_top_1000_month['location'].str.split('/').str[0]

    l_data_top_100_month.append(data_top_1000_month)

    # Aggregate scores by country for the current month
    country_scores = data_top_1000_month.groupby('country')['score'].sum().reset_index()

    # Add the current month to the result
    country_scores['year_month'] = month

    # Append the result for the current month
    results.append(country_scores)

# Concatenate the results for all months
data_top_1000_final = pd.concat(results)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_top_1000['date'] = pd.to_datetime(data_top_1000['date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_top_1000['year_month'] = data_top_1000['date'].dt.to_period('M')


In [11]:
sorted_months[::2]

[Period('2012-01', 'M'),
 Period('2012-03', 'M'),
 Period('2012-05', 'M'),
 Period('2012-07', 'M'),
 Period('2012-09', 'M'),
 Period('2012-11', 'M'),
 Period('2013-01', 'M'),
 Period('2013-03', 'M'),
 Period('2013-05', 'M'),
 Period('2013-07', 'M'),
 Period('2013-09', 'M'),
 Period('2013-11', 'M'),
 Period('2014-01', 'M'),
 Period('2014-03', 'M'),
 Period('2014-05', 'M'),
 Period('2014-07', 'M'),
 Period('2014-09', 'M'),
 Period('2014-11', 'M'),
 Period('2015-01', 'M'),
 Period('2015-03', 'M'),
 Period('2015-05', 'M'),
 Period('2015-07', 'M'),
 Period('2015-09', 'M'),
 Period('2015-11', 'M'),
 Period('2016-01', 'M'),
 Period('2016-03', 'M'),
 Period('2016-05', 'M'),
 Period('2016-07', 'M'),
 Period('2016-09', 'M'),
 Period('2016-11', 'M'),
 Period('2017-01', 'M'),
 Period('2017-03', 'M'),
 Period('2017-05', 'M'),
 Period('2017-07', 'M'),
 Period('2017-09', 'M'),
 Period('2017-11', 'M'),
 Period('2018-01', 'M'),
 Period('2018-03', 'M'),
 Period('2018-05', 'M'),
 Period('2018-07', 'M'),


In [12]:
last_month = l_data_top_100_month[-2]
before_last_month = l_data_top_100_month[-18]

In [13]:
data_top_1000_final

Unnamed: 0,country,score,year_month
0,au,30.0,2012-01
1,ca,15.0,2012-01
2,de,30.0,2012-01
3,fi,10.0,2012-01
4,jp,30.0,2012-01
...,...,...,...
240,ye,33.0,2023-11
241,yt,4.0,2023-11
242,za,86.0,2023-11
243,zm,0.0,2023-11


In [14]:
'kr' in data_top_1000_final['country'].unique()

True

In [15]:
# Remove the country with score 0
data_top_1000_final = data_top_1000_final[data_top_1000_final["score"] > 0]

data_top_1000_final

Unnamed: 0,country,score,year_month
0,au,30.0,2012-01
1,ca,15.0,2012-01
2,de,30.0,2012-01
3,fi,10.0,2012-01
4,jp,30.0,2012-01
...,...,...,...
239,xk,20.0,2023-11
240,ye,33.0,2023-11
241,yt,4.0,2023-11
242,za,86.0,2023-11


In [16]:
# Show the top 10 countries with the highest score in the last month
top_countries = data_top_1000_final[data_top_1000_final["year_month"] == sorted_months[-30]].sort_values("score", ascending=False).head(10)
top_countries

Unnamed: 0,country,score,year_month


In [17]:
# Convert column year_month to string
#data_top_1000_final["year_month"] = data_top_1000_final["year_month"].astype(str)

# Set location to upper case
data_top_1000_final["country"] = data_top_1000_final["country"].str.upper()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_top_1000_final["country"] = data_top_1000_final["country"].str.upper()


In [18]:
# Load flag data
df_flags = pd.read_json("/Users/cdroin/Library/CloudStorage/GoogleDrive-colasdroin@gmail.com/Mon Drive/Data_viz/speedruns/app/public/data/flag_data.json")
df_flags

Unnamed: 0,code,emoji,unicode,name,title,dialCode
0,AD,🇦🇩,U+1F1E6 U+1F1E9,Andorra,flag for Andorra,+376
1,AE,🇦🇪,U+1F1E6 U+1F1EA,United Arab Emirates,flag for United Arab Emirates,+971
2,AF,🇦🇫,U+1F1E6 U+1F1EB,Afghanistan,flag for Afghanistan,+93
3,AG,🇦🇬,U+1F1E6 U+1F1EC,Antigua and Barbuda,flag for Antigua and Barbuda,+1268
4,AI,🇦🇮,U+1F1E6 U+1F1EE,Anguilla,flag for Anguilla,+1 264
...,...,...,...,...,...,...
246,YE,🇾🇪,U+1F1FE U+1F1EA,Yemen,flag for Yemen,+967
247,YT,🇾🇹,U+1F1FE U+1F1F9,Mayotte,flag for Mayotte,+262
248,ZA,🇿🇦,U+1F1FF U+1F1E6,South Africa,flag for South Africa,+27
249,ZM,🇿🇲,U+1F1FF U+1F1F2,Zambia,flag for Zambia,+260


In [19]:
# Ensure all the locations are in the flag data
for country in data_top_1000_final["country"].unique():
    if country not in df_flags["code"].values:
        print(country)
        
# Remove the countries that are not in the flag data
data_top_1000_final = data_top_1000_final[data_top_1000_final["country"].isin(df_flags["code"].values)]

EO
VH


In [20]:
'KR' in data_top_1000_final['country'].unique()

True

In [21]:
# Always have all country for each month (with score 0, potentially)
data_top_1000_final = data_top_1000_final.set_index(["year_month", "country"]).unstack(fill_value=0).stack().reset_index()

# Sort each month by country
data_top_1000_final = data_top_1000_final.sort_values(["year_month", "country"])


In [22]:
data_top_1000_final

Unnamed: 0,year_month,country,score
0,2012-01,AD,0.0
1,2012-01,AE,0.0
2,2012-01,AF,0.0
3,2012-01,AG,0.0
4,2012-01,AL,0.0
...,...,...,...
9942,2023-11,XK,20.0
9943,2023-11,YE,33.0
9944,2023-11,YT,4.0
9945,2023-11,ZA,86.0


In [23]:
# Load the country colors from /Users/cdroin/Library/CloudStorage/GoogleDrive-colasdroin@gmail.com/Mon Drive/Data_viz/speedruns/pre_analysis/data/national_colors.json
df_colors = pd.read_json("data/national_colors.json")
df_colors

Unnamed: 0,country,alpha2,colors
0,Algeria,DZ,"{'primary': ['#008000', '#FFFFFF', '#FF0000'],..."
1,Angola,AO,"{'primary': ['#FF0000', '#000000', '#FFFF00'],..."
2,Benin,BJ,"{'primary': ['#008000', '#FFFF00', '#FF0000'],..."
3,Botswana,BW,"{'primary': ['#76ACDC', '#000000', '#FFFFFF'],..."
4,Burkina Faso,BF,"{'primary': ['#FF0000', '#008000', '#FFFF00'],..."
...,...,...,...
209,Niue,NU,"{'primary': ['#FFDF00', '#0066FF'], 'secondary..."
210,Samoa,WS,"{'primary': ['#FF0000', '#FFFFFF', '#0000FF'],..."
211,Solomon Islands,SB,"{'primary': ['#497E48', '#FFFF00', '#0048ba'],..."
212,Tonga,TO,"{'primary': ['#FF0000', '#FFFFFF'], 'secondary..."


In [24]:
# Only keep the countries that are in the score data
df_colors = df_colors[df_colors["alpha2"].isin(data_top_1000_final["country"].unique())]

# Only keep the first primary color
df_colors["color"] = df_colors["colors"].apply(lambda x: x['primary'][0].lower())

df_colors


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_colors["color"] = df_colors["colors"].apply(lambda x: x['primary'][0].lower())


Unnamed: 0,country,alpha2,colors,color
0,Algeria,DZ,"{'primary': ['#008000', '#FFFFFF', '#FF0000'],...",#008000
1,Angola,AO,"{'primary': ['#FF0000', '#000000', '#FFFF00'],...",#ff0000
5,Burundi,BI,"{'primary': ['#FF0000', '#FFFFFF', '#008000'],...",#ff0000
9,Chad,TD,"{'primary': ['#00008B', '#FFD700', '#CC0000'],...",#00008b
10,Comoros,KM,"{'primary': ['#FFFF00', '#FFFFFF', '#FF0000', ...",#ffff00
...,...,...,...,...
205,French Polynesia,PF,"{'primary': ['#FF0000', '#FFFFFF'], 'secondary...",#ff0000
206,Guam,GU,"{'primary': ['#4F69C6', '#CA3435'], 'secondary...",#4f69c6
208,New Zealand,NZ,"{'primary': ['#000000'], 'secondary': ['#FFFFF...",#000000
209,Niue,NU,"{'primary': ['#FFDF00', '#0066FF'], 'secondary...",#ffdf00


In [25]:
# Add the color to the score data
data_top_1000_final = data_top_1000_final.merge(df_colors[["alpha2", "color"]], left_on="country", right_on="alpha2")

# Remove column alpha2
data_top_1000_final = data_top_1000_final.drop(columns=["alpha2"])

data_top_1000_final

Unnamed: 0,year_month,country,score,color
0,2012-01,AD,0.0,#0000ff
1,2012-04,AD,0.0,#0000ff
2,2012-07,AD,0.0,#0000ff
3,2012-10,AD,0.0,#0000ff
4,2013-01,AD,0.0,#0000ff
...,...,...,...,...
8129,2023-01,ZW,93.0,#008000
8130,2023-04,ZW,91.0,#008000
8131,2023-07,ZW,87.0,#008000
8132,2023-10,ZW,78.0,#008000


In [26]:
# Load the world map data
world = json.load(open("output/world_map.json"))

# Ensure that all the countries in the world map are in the score data
for country in world["features"]:
    if country["properties"]['name'] not in data_top_1000_final["country"].unique():
        print(country["properties"]['name'])

# Add all the missing countries to the score data with score 0
missing_countries = []
for country in world["features"]:
    if country["properties"]['name'] not in data_top_1000_final["country"].unique():
        missing_countries.append({"year_month": sorted_months[-1], "country": country["properties"]['name'], "score": 0, "color": "#000000"})

# Concatenate the missing countries to the original DataFrame
data_top_1000_final = pd.concat([data_top_1000_final, pd.DataFrame(missing_countries)], ignore_index=True)

TZ
EH
PG
CD
SD
HT
FK
GL
TF
TL
LS
BZ
GY
BW
ML
BJ
CM
GH
GW
BF
CF
CG
GA
GQ
ZM
MW
MZ
SZ
GM
VU
TJ
MD
NC
SB
BN
MK
XK
SS


In [27]:
pd.DataFrame(missing_countries)

Unnamed: 0,year_month,country,score,color
0,2023-11,TZ,0,#000000
1,2023-11,EH,0,#000000
2,2023-11,PG,0,#000000
3,2023-11,CD,0,#000000
4,2023-11,SD,0,#000000
5,2023-11,HT,0,#000000
6,2023-11,FK,0,#000000
7,2023-11,GL,0,#000000
8,2023-11,TF,0,#000000
9,2023-11,TL,0,#000000


In [28]:
data_top_1000_final

Unnamed: 0,year_month,country,score,color
0,2012-01,AD,0.0,#0000ff
1,2012-04,AD,0.0,#0000ff
2,2012-07,AD,0.0,#0000ff
3,2012-10,AD,0.0,#0000ff
4,2013-01,AD,0.0,#0000ff
...,...,...,...,...
8167,2023-11,SB,0.0,#000000
8168,2023-11,BN,0.0,#000000
8169,2023-11,MK,0.0,#000000
8170,2023-11,XK,0.0,#000000


In [29]:
def hex_to_pastel(hex_color):
    # Convert HEX to RGB
    rgb = mcolors.hex2color(hex_color)  # Gives values in the range [0, 1]
    
    # Increase lightness and reduce saturation
    pastel_rgb = [(channel + 0.7) / 2 for channel in rgb]  # Blend with white
    
    # Convert back to HEX
    pastel_hex = mcolors.rgb2hex(pastel_rgb)
    return pastel_hex

# Apply the function to the dataframe
data_top_1000_final["color"] = data_top_1000_final["color"].apply(hex_to_pastel)

In [30]:
# Convert month to string
data_top_1000_final["year_month"] = data_top_1000_final["year_month"].astype(str)

# Add full country name to the score data
data_top_1000_final["full_name"] = coco.convert(names=data_top_1000_final["country"], to='name_short')


In [31]:
data_top_1000_final

Unnamed: 0,year_month,country,score,color,full_name
0,2012-01,AD,0.0,#5959d9,Andorra
1,2012-04,AD,0.0,#5959d9,Andorra
2,2012-07,AD,0.0,#5959d9,Andorra
3,2012-10,AD,0.0,#5959d9,Andorra
4,2013-01,AD,0.0,#5959d9,Andorra
...,...,...,...,...,...
8167,2023-11,SB,0.0,#595959,Solomon Islands
8168,2023-11,BN,0.0,#595959,Brunei Darussalam
8169,2023-11,MK,0.0,#595959,North Macedonia
8170,2023-11,XK,0.0,#595959,Kosovo


In [32]:
# Sort data_top_1000_final by increasing month and then by decreasing value
data_top_1000_final = data_top_1000_final.sort_values(["year_month", "score"], ascending=[True, False])


In [33]:
# Remove countries with a sum score of 0 across all months
# data_top_1000_final = data_top_1000_final[data_top_1000_final.groupby('country')['score'].transform('sum') > 0]

In [34]:
data_top_1000_final

Unnamed: 0,year_month,country,score,color,full_name
7644,2012-01,US,135.0,#d95959,United States
490,2012-01,AU,30.0,#599959,Australia
1813,2012-01,DE,30.0,#595959,Germany
3675,2012-01,JP,30.0,#d95959,Japan
6517,2012-01,SE,30.0,#5959d9,Sweden
...,...,...,...,...,...
8167,2023-11,SB,0.0,#595959,Solomon Islands
8168,2023-11,BN,0.0,#595959,Brunei Darussalam
8169,2023-11,MK,0.0,#595959,North Macedonia
8170,2023-11,XK,0.0,#595959,Kosovo


In [35]:

data = {'header': data_top_1000_final.columns.to_list(), 'data': data_top_1000_final.values.tolist()}

# Save the graph data as a json file
# with open("/Users/cdroin/Library/CloudStorage/GoogleDrive-colasdroin@gmail.com/Mon Drive/Data_viz/speedruns/app/public/data/race_data.json", "w") as f:
#     json.dump(data, f)

# Save the graph data as a json file
with gzip.open("/Users/cdroin/Library/CloudStorage/GoogleDrive-colasdroin@gmail.com/Mon Drive/Data_viz/speedruns/app/public/data/race_data.json.gz", "wt") as f:
    json.dump(data, f)