In [24]:
import pandas as pd
import seaborn as sns
import numpy as np
import igraph as ig
import torch
import plotly.express as px
from src import Simulation as sim
from src import Dir_Reg
from src import Align
from src import visualize_latent_space as vls
from src import ABC_Reg

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [2]:
#a function that reads the dataset
def read_game_data(path):
    game_data = pd.read_json(path)
    game_data_copy = game_data.explode("teams").explode("teams")
    
    team_data = pd.json_normalize(game_data_copy["teams"])
    #after slicing, the index remains, so reset_index is needed. without "drop = True", we get two new columns of indices 
    team_0 = team_data[0: :2].reset_index(drop = True).add_suffix("_0")
    team_1 = team_data[1: :2].reset_index(drop = True).add_suffix("_1")
    #binds the sliced df together
    dataset = pd.concat([game_data.drop(columns="teams"), team_0, team_1], axis = 1)
    dataset["started_at"] = pd.to_datetime(dataset["started_at"])
    dataset["finished_at"] = pd.to_datetime(dataset["finished_at"])
    dataset.sort_values(by = "started_at")
    dataset = dataset.drop(columns = "map_id")
    dataset = dataset.rename(columns = {"profile_id_0": "player_0", 
                              "profile_id_1": "player_1", 
                              "civilization_1": "civ_1", 
                              "civilization_0": "civ_0", 
                              "civilization_randomized_0": "rand_civ_0", 
                              "civilization_randomized_1": "rand_civ_1"})
    return(dataset)

def map_player_ids(df, player_col_1, player_col_2):
    """
    Maps player IDs in a DataFrame to unique consecutive integers.

    Parameters:
    df (pd.DataFrame): DataFrame containing player match data.
    player_col_1 (str): Name of the first player column.
    player_col_2 (str): Name of the second player column.

    Returns:
    dict: A dictionary mapping player IDs to unique consecutive integers.
    list: A list of tuples representing the mapped edge list.
    """
    # Get unique players in the DataFrame
    unique_players = pd.unique(df[[player_col_1, player_col_2]].values.ravel())

    # Create a mapping from player IDs to consecutive integers
    player_id_to_index = {player_id: index for index, player_id in enumerate(unique_players)}

    # Map the player IDs in the edge list to consecutive integers
    mapped_edges = [(player_id_to_index[row[player_col_1]], player_id_to_index[row[player_col_2]]) for _, row in df.iterrows()]

    return player_id_to_index, mapped_edges



In [3]:
#load the dataset
# path = r"C:\Users\yangs\Downloads\games_qm_1v1_2023_q2.json\games_qm_1v1_2023_q2.json"
path = r"C:\Users\yangs\Downloads\games_rm_1v1_s5.json\games_rm_1v1_s5.json"
dataset = read_game_data(path)

In [37]:
temp = dataset.loc[:,["player_0", "player_1", "game_id", "started_at", "server"]]

temp = temp[temp["started_at"].between("2023-06-16", "2023-07-16")]
player_0_counts = temp["player_0"].value_counts()
player_1_counts = temp["player_1"].value_counts()
player_counts = player_0_counts.add(player_1_counts, fill_value = 0).astype(int)

player_counts = player_counts.reset_index()
player_counts.columns = ["Player_ID", "Match_Count"]

freq_player_wk_1 = player_counts[player_counts["Match_Count"] > 10]["Player_ID"].to_list()

freq_subset = temp[temp["player_0"].isin(freq_player_wk_1) & temp["player_1"].isin(freq_player_wk_1)]
freq_subset = freq_subset.loc[:, ["player_0", "player_1"]].drop_duplicates()


player_id_to_index, mapped_edges = map_player_ids(freq_subset, 'player_0', 'player_1')
g = ig.Graph(edges = mapped_edges, directed = False)

A0 = torch.Tensor(g.get_adjacency().data)

In [36]:
temp = dataset.loc[:,["player_0", "player_1", "game_id", "started_at", "server"]]

temp = temp[temp["started_at"].between("2023-07-16", "2023-08-16")]
player_0_counts = temp["player_0"].value_counts()
player_1_counts = temp["player_1"].value_counts()
player_counts = player_0_counts.add(player_1_counts, fill_value = 0).astype(int)

player_counts = player_counts.reset_index()
player_counts.columns = ["Player_ID", "Match_Count"]

freq_player_wk_1 = player_counts[player_counts["Match_Count"] > 10]["Player_ID"].to_list()

freq_subset = temp[temp["player_0"].isin(freq_player_wk_1) & temp["player_1"].isin(freq_player_wk_1)]
freq_subset = freq_subset.loc[:, ["player_0", "player_1"]].drop_duplicates()


player_id_to_index, mapped_edges = map_player_ids(freq_subset, 'player_0', 'player_1')
g = ig.Graph(edges = mapped_edges, directed = False)

A1 = torch.Tensor(g.get_adjacency().data)
