In [42]:
import pandas as pd
import numpy as np  

## I. Get matches from patch >= 55 (7.36),
Preprocess throw, loss, comeback, stomp empty values filled by 0

In [43]:
cols_to_read = [
    'match_id', 'duration', 'radiant_win', 
    'tower_status_radiant', 'tower_status_dire', 'barracks_status_radiant', 
    'barracks_status_dire', 'first_blood_time', 'radiant_score', 'dire_score',
    'radiant_team_id', 'dire_team_id', 'throw', 'loss', 'comeback', 'stomp', 'patch'
]
def read_metadata(csv_path):
    df = pd.read_csv(csv_path, usecols=cols_to_read, index_col='match_id')
    
    # get patch 55 which is 7.36
    df = df[df['patch'] >= 55] 
    
    # convert to decimal the status of barracks and tower status, outer bits refer to outer towers
    # df[['barracks_status_dire', 'barracks_status_radiant', 
    #     'tower_status_dire', 'tower_status_radiant']] = df[['barracks_status_dire','barracks_status_radiant' ,
    #                                                         'tower_status_dire','tower_status_radiant']].map(
    #     lambda x: int(str(x).strip().replace("'", ""), 2)
    # )

    # Fill NaN values with 0 in specified columns
    df[['throw', 'loss', 'comeback', 'stomp']] = df[['throw', 'loss', 'comeback', 'stomp']].fillna(0)                                                       

    # Drop the 'patch' column
    df = df.drop(columns=['patch'])

    return df

In [55]:
dota_df = read_metadata('./data/202405/main_metadata.csv/main_metadata.csv')
dota_df

Unnamed: 0_level_0,barracks_status_dire,barracks_status_radiant,dire_score,duration,first_blood_time,radiant_score,radiant_win,tower_status_dire,tower_status_radiant,throw,loss,comeback,stomp,dire_team_id,radiant_team_id
match_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
7750912161,'00111100','00110011',52,2075,148,36,False,'0000011110110000','0000000100000110',0.0,0.0,1210.0,10223.0,8629317.0,9425660.0
7750914469,'00111111','00110000',23,1856,206,16,False,'0000011100100100','0000011100000000',0.0,0.0,9441.0,1093.0,8629318.0,8629005.0
7750915644,'00111111','00001111',37,1850,251,26,False,'0000011110100110','0000011000111111',0.0,0.0,3335.0,8359.0,9330489.0,8961813.0
7750937564,'00111111','00000011',44,2525,3,35,False,'0000011110000100','0000011000000110',0.0,0.0,801.0,20711.0,9395679.0,9344594.0
7750968496,'00111111','00110011',32,1513,117,27,False,'0000011111110110','0000011100000100',0.0,0.0,2447.0,12041.0,8961813.0,9330489.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7769778401,'00110011','00111111',12,1692,117,27,True,'0000011100000110','0000011110110111',929.0,15888.0,0.0,0.0,8970060.0,8936613.0
7769839809,'00111111','00111100',44,2313,164,35,False,'0000011100100110','0000011000100000',0.0,0.0,10969.0,10072.0,8936613.0,8970060.0
7769914503,'00110011','00111111',19,1467,289,31,True,'0000011110000100','0000011110110111',610.0,16183.0,0.0,0.0,9025669.0,8849833.0
7769963805,'00111111','00000011',37,1935,289,15,False,'0000011111111110','0000000000000111',0.0,0.0,1219.0,28079.0,8849833.0,9025669.0


## II. Generate sparse matrix for hero selection for winning teams

In [45]:
def read_draft(csv_path, matches):
    picks_bans = pd.read_csv(csv_path)
    picks_bans = picks_bans[picks_bans['match_id'].isin(matches.index)]
    picks_bans = picks_bans.drop([picks_bans.columns[0],'ord','leagueid'], axis = 1).reset_index(drop=True)
    return picks_bans

In [46]:
picks_bans = read_draft('./data/202405/picks_bans.csv/picks_bans.csv', dota_df)
picks_bans

Unnamed: 0,is_pick,hero_id,team,order,match_id
0,False,78.0,1.0,0.0,7750912161
1,False,95.0,0.0,1.0,7750912161
2,False,51.0,0.0,2.0,7750912161
3,False,9.0,1.0,3.0,7750912161
4,False,63.0,0.0,4.0,7750912161
...,...,...,...,...,...
18878,False,35.0,1.0,19.0,7770013750
18879,False,93.0,1.0,20.0,7770013750
18880,False,6.0,0.0,21.0,7770013750
18881,True,114.0,0.0,22.0,7770013750


Filter out winning drafts

In [82]:
winning_drafts = pd.DataFrame()

def retrieve_wins(matches, draft):
    
    result_df = pd.DataFrame()
    # Iterate through each row in matches
    for _, row in matches.iterrows():
        match_id = row.name  # Access match_id from the index
        radiant_win = row['radiant_win']
        
        # Determine which team to filter by
        team_filter = 0 if radiant_win else 1
        
        # Filter picks_bans for the current match_id and team
        filtered_picks_bans = draft[(draft.match_id == match_id) & (draft['team'] == team_filter)]
        
        # Append the filtered results to result_df
        result_df = pd.concat([result_df, filtered_picks_bans], ignore_index=True)
    return result_df

wins = retrieve_wins(dota_df, picks_bans)

# Future proofing code for multiple csvs
winning_drafts = pd.concat([winning_drafts, wins], ignore_index=True)
winning_drafts['order'] = winning_drafts['order'] + 1


In [91]:
winning_drafts

Unnamed: 0,is_pick,hero_id,team,order,match_id
0,False,78.0,1.0,1.0,7750912161
1,False,9.0,1.0,4.0,7750912161
2,False,77.0,1.0,7.0,7750912161
3,True,59.0,1.0,8.0,7750912161
4,False,66.0,1.0,10.0,7750912161
...,...,...,...,...,...
9437,True,98.0,0.0,15.0,7770013750
9438,True,52.0,0.0,18.0,7770013750
9439,False,106.0,0.0,19.0,7770013750
9440,False,6.0,0.0,22.0,7770013750


In [105]:

def generate_sparse_matrix(matches, winning_drafts):
    num_rows = matches.shape[0]
    num_cols = 138  # Define the number of columns
    
    # Create the DataFrame with values from 1 to 124
    sparse_draft = pd.DataFrame(
        np.zeros((num_rows, num_cols)),
        index=matches.index,
        columns=range(1, num_cols + 1)  # Set column names from 1 to 124
    )
    
    # Iterate through each row in winning_drafts
    for _, row in winning_drafts.iterrows():
        match_id = row['match_id']
        hero_id = row['hero_id']
        order = row['order']
        
        # Update the sparse_draft DataFrame
        if match_id in sparse_draft.index:
            sparse_draft.loc[match_id, hero_id] = order
    
    return sparse_draft

# Assuming dota_df and winning_drafts are your DataFrames
sparse_draft = generate_sparse_matrix(dota_df, winning_drafts)
sparse_draft


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,129,130,131,132,133,134,135,136,137,138
match_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7750912161,0.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7750914469,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,18.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.0,0.0,0.0
7750915644,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0
7750937564,24.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7750968496,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7769778401,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7769839809,0.0,0.0,0.0,0.0,8.0,23.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7769914503,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7769963805,0.0,15.0,0.0,0.0,0.0,22.0,0.0,7.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
