In [1]:
import pandas as pd
import json

In [2]:
# Load all the data
data_runs = pd.read_csv('data/v1/runs-data.csv')
data_games = pd.read_csv('data/v1/games-data.csv')
data_categories = pd.read_csv('data/v1/categories-data.csv')
data_leaderboards = pd.read_csv('data/v1/leaderboards-data.csv')
data_levels = pd.read_csv('data/v1/levels-data.csv')
data_users = pd.read_csv('data/v1/users-data.csv')
data_platforms = pd.read_csv('data/other_dataset_with_platforms.csv')

In [3]:
# Only keep id and plateform for data_platforms
data_platforms = data_platforms[["platform_id","platform_name"]]
# Remove duplicates
data_platforms = data_platforms.drop_duplicates()

In [16]:
# Merge all the datasets
data = data_runs.merge(data_games, left_on='gameID', right_on='ID', how='left', suffixes=('', '_game'))
data = data.merge(data_categories, left_on='categoryID', right_on='ID', how='left', suffixes=('', '_category'))
data = data.merge(data_levels, left_on='levelID', right_on='ID', how='left', suffixes=('', '_level'))
data = data.merge(data_users, left_on='players', right_on='ID', how='left', suffixes=('', '_user'))
data = data.merge(data_leaderboards, left_on='ID', right_on='runID', how='left', suffixes=('', '_leaderboard'))
data = data.merge(data_platforms, left_on='platform', right_on='platform_id', how='left', suffixes=('', '_plateform'))

In [17]:
# Delete the arrays to free up memory
del data_runs
del data_games
del data_categories
del data_leaderboards
del data_levels
del data_users
del data_platforms


In [18]:
# Only keep verified runs
data = data[data['status'] == 'verified']

In [19]:
# Delete all columns if _id or ID in the name
data = data[[col for col in data.columns if '_id' not in col and 'ID' not in col]] 

# Remove the rows with NaN values in the 'type' column (we don't want per-level data)
# data = data.dropna(subset=['location'])

In [20]:
# Delete the other columns that are not needed
data = data.drop(
    columns=[
        "platform_leaderboard",
        "emulated_leaderboard",
        "players_leaderboard",
        "examiner_leaderboard",
        "verifiedDate_leaderboard",
        "variablesAndValues",
        "date_leaderboard",
        "primaryTime_leaderboard",
        "place",
        "numRuns",
    ]
)

# Delete the other columns that are not needed
data = data.drop(
    columns=[
        "platform",
        "players",
        "examiner",
        "values",
        "status",
        "statusReason",
        "verifiedDate",
    ]
)

data = data.drop(columns=['date', 'primaryTime', 'releaseDate', 'createdDate', 'name_user', 'signupDate', 'location'])

In [21]:
# Replace the nan in the name_level with 'None'
data['name_level'] = data['name_level'].fillna('None')

In [22]:
# Group all rows by name and category, taking the first value of each column
data = data.groupby(['name', 'name_category', 'name_level']).first().reset_index()

In [23]:
# Look for all rows with mario in name
data_mario_64 = data[data['name'] == 'Super Mario 64']
data_portal = data[data['name'] == 'Portal']    

In [24]:
data_mario_64

Unnamed: 0,name,name_category,name_level,emulated,URL,numCategories,numLevels,rules,type,numPlayers,rules_level,platform_name
284973,Super Mario 64,0 Star,,False,sm64,6.0,15.0,## Objectives:\r\n* Beat the game\r\n\r\n## Ga...,per-game,1.0,,Nintendo 64
284974,Super Mario 64,1 Star,,True,sm64,6.0,15.0,## Objectives: \r\n* Beat the game\r\n\r\n## G...,per-game,1.0,,Nintendo 64
284975,Super Mario 64,120 Star,,True,sm64,6.0,15.0,## Objectives:\r\n* Collect all 120 stars\r\n*...,per-game,1.0,,Nintendo 64
284976,Super Mario 64,16 Star,,True,sm64,6.0,15.0,## Objectives:\r\n* Beat the game\r\n\r\n## Ga...,per-game,1.0,,Nintendo 64
284977,Super Mario 64,70 Star,,True,sm64,6.0,15.0,## Objectives:\r\n* Beat the game\r\n\r\n## Ga...,per-game,1.0,,Nintendo 64
284978,Super Mario 64,Stage RTA,Big Boo's Haunt,False,sm64,6.0,15.0,# General Rules (updated 10/16/2023)\r\n\nStag...,per-level,1.0,,Nintendo 64
284979,Super Mario 64,Stage RTA,Bob-omb Battlefield,False,sm64,6.0,15.0,# General Rules (updated 10/16/2023)\r\n\nStag...,per-level,1.0,,Nintendo 64
284980,Super Mario 64,Stage RTA,"Cool, Cool Mountain",False,sm64,6.0,15.0,# General Rules (updated 10/16/2023)\r\n\nStag...,per-level,1.0,,Nintendo 64
284981,Super Mario 64,Stage RTA,"Dire, Dire Docks",False,sm64,6.0,15.0,# General Rules (updated 10/16/2023)\r\n\nStag...,per-level,1.0,,Nintendo 64
284982,Super Mario 64,Stage RTA,Hazy Maze Cave,False,sm64,6.0,15.0,# General Rules (updated 10/16/2023)\r\n\nStag...,per-level,1.0,,Nintendo 64


In [28]:
data_portal = data[data['name'] == 'Portal']    

In [29]:
data_portal

Unnamed: 0,name,name_category,name_level,emulated,URL,numCategories,numLevels,rules,type,numPlayers,rules_level,platform_name
209832,Portal,Glitchless,00-01,False,portal,7.0,24.0,[Glitchless category rules](https://www.speedr...,per-level,1.0,,PC
209833,Portal,Glitchless,02-03,False,portal,7.0,24.0,[Glitchless category rules](https://www.speedr...,per-level,1.0,,PC
209834,Portal,Glitchless,04-05,False,portal,7.0,24.0,[Glitchless category rules](https://www.speedr...,per-level,1.0,,PC
209835,Portal,Glitchless,06-07,False,portal,7.0,24.0,[Glitchless category rules](https://www.speedr...,per-level,1.0,,PC
209836,Portal,Glitchless,08,False,portal,7.0,24.0,[Glitchless category rules](https://www.speedr...,per-level,1.0,,PC
...,...,...,...,...,...,...,...,...,...,...,...,...
209903,Portal,Out of Bounds,Adv 18,False,portal,7.0,24.0,[OoB category rules](https://www.speedrun.com/...,per-level,1.0,"Use \""changelevel testchmb_a_14_advanced\"" com...",PC
209904,Portal,Out of Bounds,,False,portal,7.0,24.0,# **Out of Bounds Rules**\r\n\r\nBeat the game...,per-game,1.0,,PC
209905,Portal,Out of Bounds,e00,False,portal,7.0,24.0,[OoB category rules](https://www.speedrun.com/...,per-level,1.0,,PC
209906,Portal,Out of Bounds,e01,False,portal,7.0,24.0,[OoB category rules](https://www.speedrun.com/...,per-level,1.0,,PC


In [30]:
data_minecraft = data[data['name'] == 'Minecraft: Java Edition']

In [32]:
data_minecraft

Unnamed: 0,name,name_category,name_level,emulated,URL,numCategories,numLevels,rules,type,numPlayers,rules_level,platform_name
165924,Minecraft: Java Edition,All Achievements,,False,mc,15.0,0.0,- Complete all of the achievements\r\n- End ti...,per-game,1.0,,PC
165925,Minecraft: Java Edition,All Advancements,,False,mc,15.0,0.0,- Complete all of the advancements\r\n- End ti...,per-game,1.0,,PC
165926,Minecraft: Java Edition,All Advancements Co-op,,False,mc,15.0,0.0,- Complete all of the advancements\r\n- End ti...,per-game,16.0,,PC
165927,Minecraft: Java Edition,Any%,,False,mc,15.0,0.0,- Complete the game and reach the credits\r\n-...,per-game,1.0,,PC
165928,Minecraft: Java Edition,Any% (Peaceful),,False,mc,15.0,0.0,- Run on Peaceful difficulty\r\n- Do not chang...,per-game,1.0,,PC
165929,Minecraft: Java Edition,Any% (Time Travel),,False,mc,15.0,0.0,- Complete the game and reach the credits\r\n-...,per-game,1.0,,PC
165930,Minecraft: Java Edition,Any% Glitchless,,False,mc,15.0,0.0,- Complete the game and reach the credits\r\n-...,per-game,1.0,,PC
165931,Minecraft: Java Edition,Any% Glitchless (Demo),,False,mc,15.0,0.0,The run must either be performed on the Minecr...,per-game,1.0,,PC
165932,Minecraft: Java Edition,Any% Glitchless (Peaceful),,False,mc,15.0,0.0,- Run on Peaceful difficulty\r\n- Do not chang...,per-game,1.0,,PC
165933,Minecraft: Java Edition,Any% Glitchless Co-op,,False,mc,15.0,0.0,- Complete the game and reach the credits\r\n-...,per-game,16.0,,PC


In [None]:
# Put all the data as a tree
dict_mario = {
    "name": "Super Mario 64",
    "children": [
        {
            "name": category,
            "children": [
                {"name": level_name, 'rules': data_mario_64[(data_mario_64["name_category"] == category) & (data_mario_64["name_level"] == level_name)]['rules'].to_list()[0]}
                for level_name in data_mario_64[data_mario_64["name_category"] == category]['name_level']
                if level_name != "None"
            ],
        }
        for category in data_mario_64["name_category"].unique()
    ],
}

# Clean tree by removing empty children
for idx, child in enumerate(dict_mario["children"]):
    if len(child["children"]) == 0:
        del dict_mario["children"][idx]['children']
        dict_mario["children"][idx]['value'] = data_mario_64[data_mario_64["name_category"] == dict_mario["children"][idx]['name']]['rules'].to_list()[0]
        
# Same for portal
dict_portal = {
    "name": "Portal",
    "children": [
        {
            "name": category,
            "children": [
                {"name": level_name, 'rules': data_portal[(data_portal["name_category"] == category) & (data_portal["name_level"] == level_name)]['rules'].to_list()[0]}
                for level_name in data_portal[data_portal["name_category"] == category]['name_level']
                if level_name != "None"
            ],
        }
        for category in data_portal["name_category"].unique()
    ],
}

# Clean tree by removing empty children
for idx, child in enumerate(dict_portal["children"]):
    if len(child["children"]) == 0:
        del dict_portal["children"][idx]['children']
        dict_portal["children"][idx]['value'] = data_portal[data_portal["name_category"] == dict_portal["children"][idx]['name']]['rules'].to_list()[0]
        
# Same for minecraft
dict_minecraft = {
    "name": "Minecraft: Java Edition",
    "children": [
        {
            "name": category,
            "children": [
                {"name": level_name, 'rules': data_minecraft[(data_minecraft["name_category"] == category) & (data_minecraft["name_level"] == level_name)]['rules'].to_list()[0]}
                for level_name in data_minecraft[data_minecraft["name_category"] == category]['name_level']
                if level_name != "None"
            ],
        }
        for category in data_minecraft["name_category"].unique()
    ],
}

# Clean tree by removing empty children
for idx, child in enumerate(dict_minecraft["children"]):
    if len(child["children"]) == 0:
        del dict_minecraft["children"][idx]['children']
        dict_minecraft["children"][idx]['value'] = data_minecraft[data_minecraft["name_category"] == dict_minecraft["children"][idx]['name']]['rules'].to_list()[0]
        
        

In [49]:
data = {'mario': dict_mario, 'portal': dict_portal, 'minecraft': dict_minecraft}

{'name': 'Super Mario 64',
 'children': [{'name': '0 Star',
   'value': '## Objectives:\\r\\n* Beat the game\\r\\n\\r\\n## Gameplay Restrictions:\\r\\n* Cannot collect any stars'},
  {'name': '1 Star',
   'value': '## Objectives: \\r\\n* Beat the game\\r\\n\\r\\n## Gameplay Restrictions:\\r\\n* DDD skip is banned'},
  {'name': '120 Star',
   'value': '## Objectives:\\r\\n* Collect all 120 stars\\r\\n* Beat the game\\r\\n\\r\\n## Gameplay Restrictions:\\r\\n* None'},
  {'name': '16 Star',
   'value': '## Objectives:\\r\\n* Beat the game\\r\\n\\r\\n## Gameplay Restrictions:\\r\\n* SBLJ is banned\\r\\n* Any method to skip the 30 star door other than Mips clipping is banned'},
  {'name': '70 Star',
   'value': '## Objectives:\\r\\n* Beat the game\\r\\n\\r\\n## Gameplay Restrictions:\\r\\n* All forms of BLJ are banned\\r\\n* All forms of MIPS clipping are banned\\r\\n* Circumventing any castle door or star requirement via any other method is banned'},
  {'name': 'Stage RTA',
   'children': 

In [None]:
#Save the data as json
with open("/Users/cdroin/Library/CloudStorage/GoogleDrive-colasdroin@gmail.com/Mon Drive/Data_viz/speedruns/app/public/data/categories.json", "w") as f:
    json.dump(data, f)