# Conversion of replays to 100 Frame Interval Combined Data

In [1]:
import pandas as pd
import os

# Creating the combined csv file

In [3]:
# Getting all unit types
unit_types = []
for filename in os.listdir(os.getcwd()):
    if filename.endswith("_actions.csv"):
        actions = pd.read_csv(os.path.join(os.getcwd(), filename))
        unit_types.extend(actions['UnitType'].unique())
unit_types = list(set(unit_types))
unit_types.sort()
print(len(unit_types))

# Getting all research types
research_types = []
for filename in os.listdir(os.getcwd()):
    if filename.endswith("_researches.csv"):
        researches = pd.read_csv(os.path.join(os.getcwd(), filename))
        research_types.extend(researches['ResearchType'].unique())
research_types = list(set(research_types))
research_types.sort()
print(len(research_types))
# print(research_types)

# Getting all upgrade types
upgrade_types = []
for filename in os.listdir(os.getcwd()):
    if filename.endswith("_upgrades.csv"):
        upgrades = pd.read_csv(os.path.join(os.getcwd(), filename))
        upgrade_types.extend(upgrades['UpgradeType'].unique())
upgrade_types = list(set(upgrade_types))
upgrade_types.sort()
print(len(upgrade_types))
# print(upgrade_types)

33
10
14


In [4]:
# Combining All Data

# Specify the directory where your replay data CSV files are stored
replay_directory = os.getcwd()

# Initialize empty lists to store data from multiple replay files
combined_actions = []
combined_destroyed_units = []
combined_metadata = []
combined_resource_data = []

combined_attacks = []
combined_upgrades = []
combined_morphs = []

combined_data = pd.DataFrame()

combined_rows = []
# combined_data = pd.DataFrame()

allReplays_df = pd.read_csv(os.path.join(replay_directory, 'all_replays_data.csv'))



# Iterate through all CSV files in the specified directory
for filename in os.listdir(replay_directory):
    if filename.endswith("_actions.csv"):
        # Load Actions and Destroyed Units data
        actions_df = pd.read_csv(os.path.join(replay_directory, filename))
        destroyed_df = pd.read_csv(os.path.join(replay_directory, filename.replace("_actions.csv", "_destroyed.csv")))
        resources_df = pd.read_csv(os.path.join(replay_directory, filename.replace("_actions.csv", "_resources.csv")))
        attacks_df = pd.read_csv(os.path.join(replay_directory, filename.replace("_actions.csv", "_attacks.csv")))
        morphs_df = pd.read_csv(os.path.join(replay_directory, filename.replace("_actions.csv", "_morphs.csv")))
        upgrades_df = pd.read_csv(os.path.join(replay_directory, filename.replace("_actions.csv", "_upgrades.csv")))
        researches_df = pd.read_csv(os.path.join(replay_directory, filename.replace("_actions.csv", "_researches.csv")))

        lastFrame = resources_df.iloc[-1]['Frame']

        for frame_interval in range(0, lastFrame, 100):
            # Filter created units for each player
            created_player1 = actions_df[(actions_df['PlayerID'] == 1)& (actions_df['Frame'] <= frame_interval)]
            created_player2 = actions_df[(actions_df['PlayerID'] == 2)& (actions_df['Frame'] <= frame_interval)]

            # Filter destroyed units for each player
            destroyed_player1 = destroyed_df[destroyed_df['PlayerID'] == 1 & (destroyed_df['Frame'] <= frame_interval)]
            destroyed_player2 = destroyed_df[destroyed_df['PlayerID'] == 2 & (destroyed_df['Frame'] <= frame_interval)]

            # Check if UnitID is in the destroyed units for each player
            alive_player1 = created_player1[~created_player1['UnitID'].isin(destroyed_df['UnitID'])]
            destroyed_count_player1 = len(created_player1) - len(alive_player1)

            alive_player2 = created_player2[~created_player2['UnitID'].isin(destroyed_df['UnitID'])]
            destroyed_count_player2 = len(created_player2) - len(alive_player2)

            unit_count_player1 = alive_player1.groupby('UnitType').size().reset_index(name='AliveUnitCountPlayer1')
            # Create a dictionary to store counts for each unit type for Player 1
            player1_unit_counts = {f"Player1_Alive_{unit_type}": count for unit_type, count in zip(unit_count_player1['UnitType'], unit_count_player1['AliveUnitCountPlayer1'])}
            
            # If a unit type is not present, add it to the dictionary with a count of 0
            for unit_type in unit_types:
                column_name = f"Player1_Alive_{unit_type}"
                if column_name not in player1_unit_counts:
                    player1_unit_counts[column_name] = 0
            player1_units = {"Player1_TotalUnits": len(created_player1), "Player1_AliveUnits": len(alive_player1), "Player1_DestroyedUnits": destroyed_count_player1,  **player1_unit_counts }

            unit_count_player2 = alive_player2.groupby('UnitType').size().reset_index(name='AliveUnitCountPlayer2')
            player2_unit_counts = {f"Player2_Alive_{unit_type}": count for unit_type, count in zip(unit_count_player2['UnitType'], unit_count_player2['AliveUnitCountPlayer2'])}
            
            # If a unit type is not present, add it to the dictionary with a count of 0
            for unit_type in unit_types:
                column_name = f"Player2_Alive_{unit_type}"
                if column_name not in player2_unit_counts:
                    player2_unit_counts[column_name] = 0
            player2_units = {"Player2_TotalUnits": len(created_player2), "Player2_AliveUnits": len(alive_player2), "Player2_DestroyedUnits": destroyed_count_player2, **player2_unit_counts}
            
            ##########################################
            ## Researches
            # If a research type is not present, add it to the dictionary with a count of 0
            researches_player1 = researches_df[(researches_df['PlayerID'] == 1) & (researches_df['EndFrame'] <= frame_interval) & (researches_df['EndFrame'] != -1)]
            researches_player2 = researches_df[(researches_df['PlayerID'] == 2) & (researches_df['EndFrame'] <= frame_interval) & (researches_df['EndFrame'] != -1)]

            # Construct the dictionary of research statuses for Player 1
            player1_research_status = {f"Player1_{research_type}_Status": 1 for research_type, frame in zip(researches_player1['ResearchType'], researches_player1['EndFrame'])}
            # print(researches_player1)
            for research_type in research_types:
                column_name = f"Player1_{research_type}_Status"
                if column_name not in player1_research_status:
                    player1_research_status[column_name] = 0

            # Construct the dictionary of research statuses for Player 2
            player2_research_status = {f"Player2_{research_type}_Status": 1 for research_type, frame in zip(researches_player2['ResearchType'], researches_player2['EndFrame'])}
            # print("PLAYER", researches_player1, researches_player2)
            for research_type in research_types:
                column_name = f"Player2_{research_type}_Status"
                if column_name not in player2_research_status:
                    player2_research_status[column_name] = 0


            ##########################################
            # Upgrades
            # If a research type is not present, add it to the dictionary with a count of 0
            upgrades_player1 = upgrades_df[(upgrades_df['PlayerID'] == 1 )& (upgrades_df['EndFrame'] <= frame_interval) & (upgrades_df['EndFrame'] != -1)]
            upgrades_player2 = upgrades_df[(upgrades_df['PlayerID'] == 2) & (upgrades_df['EndFrame'] <= frame_interval) & (upgrades_df['EndFrame'] != -1)]

             # Construct the dictionary of upgrade levels for Player 1
            player1_upgrade_level = {f"Player1_{upgrade_type}_Level": level for upgrade_type, level in zip(upgrades_player1['UpgradeType'], upgrades_player1['UpgradeLevel'])}
            # print(researches_player1)
            for upgrade_type in upgrade_types:
                column_name = f"Player1_{upgrade_type}_Level"
                if column_name in player1_upgrade_level and player1_upgrade_level[column_name] == None:
                    player1_upgrade_level[column_name] = 0
                if column_name not in player1_upgrade_level:
                    player1_upgrade_level[column_name] = 0

            # Construct the dictionary of upgrade levels for Player 1
            player2_upgrade_level = {f"Player2_{upgrade_type}_Level": level for upgrade_type, level in zip(upgrades_player2['UpgradeType'], upgrades_player2['UpgradeLevel'])}
          
            # print(researches_player1)
            for upgrade_type in upgrade_types:
                column_name = f"Player2_{upgrade_type}_Level"
                if column_name in player2_upgrade_level and player2_upgrade_level[column_name] == None:
                    player2_upgrade_level[column_name] = 0
                if column_name not in player2_upgrade_level:
                    player2_upgrade_level[column_name] = 0
        
            ##########################################
            ## Resources
            # Filter created units for each player
            resources_player1 = resources_df[(resources_df['PlayerID'] == 1) & (resources_df['Frame'] <= frame_interval)]
            resources_player2 = resources_df[(resources_df['PlayerID'] == 2) & (resources_df['Frame'] <= frame_interval)]

            # Print the last frame for each player
            # print("\nPlayer 1:")
            last_frame_player1 = resources_player1.iloc[-1]
            # print(last_frame_player1)
            player1_res_data = {"Player1_Minerals": last_frame_player1['Minerals'], "Player1_Gas": last_frame_player1['Gas'], "Player1_GatheredMinerals": last_frame_player1['GatheredMinerals'], "Player1_GatheredGas": last_frame_player1['GatheredGas'],"Player1_SupplyUsed": last_frame_player1['SupplyUsed'],"Player1_SupplyTotal": last_frame_player1['SupplyTotal']}
            # print(player1_res_data)

            # print("\nPlayer 2:")
            last_frame_player2 = resources_player2.iloc[-1]
            # print(last_frame_player2)
            player2_res_data = {"Player2_Minerals": last_frame_player2['Minerals'], "Player2_Gas": last_frame_player2['Gas'], "Player2_GatheredMinerals": last_frame_player2['GatheredMinerals'], "Player2_GatheredGas": last_frame_player2['GatheredGas'],"Player2_SupplyUsed": last_frame_player2['SupplyUsed'],"Player2_SupplyTotal": last_frame_player2['SupplyTotal']}
            # print(player2_res_data)

            # Attacks
            attacks_player1 = attacks_df[attacks_df['DefenderID'] == 1 & (attacks_df['Frame'] <= 15700)]
            attacks_player2 = attacks_df[attacks_df['DefenderID'] == 2 & (attacks_df['Frame'] <= 15700)]
            attacks_before = attacks_df[(attacks_df['Frame'] >= frame_interval - 100) & (attacks_df['Frame'] <= frame_interval)]
            attacks_till_frame = attacks_df[attacks_df['Frame'] <= frame_interval]

            Player1_BeforeAttackUnits = sum(attacks_before["Player1_BeforeAttackUnits"])
            Player2_BeforeAttackUnits = sum(attacks_before["Player2_BeforeAttackUnits"])
            Player1_AfterAttackUnits = sum(attacks_before["Player1_AfterAttackUnits"])
            Player2_AfterAttackUnits = sum(attacks_before["Player2_AfterAttackUnits"])
            Player1_LostInAttackUnits = sum(attacks_before["Player1_LostInAttackUnits"])
            Player2_LostInAttackUnits = sum(attacks_before["Player2_LostInAttackUnits"])
            Player1_LostInAttackMinerals = sum(attacks_before["Player1_LostInAttackMinerals"])
            Player2_LostInAttackMinerals = sum(attacks_before["Player2_LostInAttackMinerals"])
            Player1_LostInAttackGas = sum(attacks_before["Player1_LostInAttackGas"])
            Player2_LostInAttackGas = sum(attacks_before["Player2_LostInAttackGas"])

            # Sum of previous values of attacks
            Player1_TotalLostMinerals = sum(attacks_till_frame["Player1_LostInAttackMinerals"])
            Player2_TotalLostMinerals = sum(attacks_till_frame["Player2_LostInAttackMinerals"])
            Player1_TotalLostGas = sum(attacks_till_frame["Player1_LostInAttackGas"])
            Player2_TotalLostGas = sum(attacks_till_frame["Player2_LostInAttackGas"])
            if Player1_LostInAttackUnits < Player2_LostInAttackUnits:
                AttackWinner = 1
            elif Player1_LostInAttackUnits > Player2_LostInAttackUnits:
                AttackWinner = 2
            elif Player1_LostInAttackUnits == Player2_LostInAttackUnits == 0:
                AttackWinner = 0
            else:
                AttackWinner = max(attacks_before["AttackWinner"])
            

            attackJson = {"AttackOccurred": len(attacks_before) + len(attacks_player2),"Player1_BeforeAttackUnits": Player1_BeforeAttackUnits, "Player2_BeforeAttackUnits": Player2_BeforeAttackUnits, "Player1_AfterAttackUnits": Player1_AfterAttackUnits, "Player2_AfterAttackUnits": Player2_AfterAttackUnits, "Player1_LostInAttackUnits": Player1_LostInAttackUnits, "Player2_LostInAttackUnits": Player2_LostInAttackUnits, "Player1_LostInAttackMinerals": Player1_LostInAttackMinerals, "Player1_LostInAttackGas": Player1_LostInAttackGas, "Player2_LostInAttackMinerals": Player2_LostInAttackMinerals, "Player2_LostInAttackGas": Player2_LostInAttackGas, "Player1_TotalLostMinerals":Player1_TotalLostMinerals,"Player2_TotalLostMinerals":Player2_TotalLostMinerals,"Player1_TotalLostGas": Player1_TotalLostGas,"Player2_TotalLostGas": Player2_TotalLostGas, "AttackWinner": AttackWinner}
            
            try:
    
                # Filter the DataFrame based on the specified replay ID
                selected_row = allReplays_df[allReplays_df['ReplayID'] == filename.replace("_actions.csv", "")]
                # print(selected_row)
                replay_metadata = {"ReplayID": selected_row['ReplayID'].values[0],"Duration":selected_row['Frames'].values[0],"Frame": frame_interval,"MapName": selected_row['Map'].values[0],"MapWidth":selected_row['MapWidth'].values[0],"MapHeight":selected_row['MapHeight'].values[0],"Player1_EAPM":selected_row['Player1.EAPM'].values[0],"Player1_ECmdCount":selected_row['Player1.EffectiveCmdCount'].values[0],"Player1_Race":selected_row['Player1.Race'].values[0],"Player2_EAPM":selected_row['Player2.EAPM'].values[0],"Player2_ECmdCount":selected_row['Player2.EffectiveCmdCount'].values[0],"Player2_Race":selected_row['Player2.Race'].values[0]}
                # print(replay_metadata)
                

                # all data combined
                replay_combined_data = replay_metadata | player1_units | player1_res_data | player2_res_data | player2_units | attackJson | player1_research_status | player2_research_status | player1_upgrade_level | player2_upgrade_level |{"Winner":selected_row['WinnerTeam'].values[0]} 
                # print(replay_combined_data.keys())
                columns = replay_combined_data.keys()
                combined_rows.append(replay_combined_data)
                # print(combined_data)
                # combined_data = pd.concat([combined_data, pd.DataFrame(replay_combined_data)], ignore_index=True)

            except Exception as e:
                print("Error with replay", filename.replace("_actions.csv", ""), e)
        # print(columns)
        # print(combined_rows)
        print("Added to DataFrame", filename.replace("_actions.csv", "")) 
        combined_data = pd.DataFrame(combined_rows)
        # break

Added to DataFrame GG10004
Added to DataFrame GG10045
Added to DataFrame GG1007
Added to DataFrame GG10150
Added to DataFrame GG10151
Added to DataFrame GG10239
Added to DataFrame GG10241
Added to DataFrame GG10310
Added to DataFrame GG10366
Added to DataFrame GG10367
Added to DataFrame GG10655
Added to DataFrame GG10715
Added to DataFrame GG10781
Added to DataFrame GG10883
Added to DataFrame GG10996
Added to DataFrame GG11079
Added to DataFrame GG11092
Added to DataFrame GG1121
Added to DataFrame GG1161
Added to DataFrame GG11804
Added to DataFrame GG11833
Added to DataFrame GG1187
Added to DataFrame GG11951
Added to DataFrame GG11967
Added to DataFrame GG11991
Added to DataFrame GG12033
Added to DataFrame GG1203
Added to DataFrame GG12064
Added to DataFrame GG12065
Added to DataFrame GG12095
Added to DataFrame GG1217
Added to DataFrame GG12188
Added to DataFrame GG12212
Added to DataFrame GG12254
Added to DataFrame GG12382
Added to DataFrame GG12416
Added to DataFrame GG12440
Added t

In [None]:
combined_data

In [5]:
combined_data.to_csv('1D_All_ReplaysData_TvT.csv', index=False)