In [3]:
import pandas as pd
from os import listdir
from os.path import isfile, join

# Merge Matches Data

**1. Find all files on rawdata folder and add it to a list**

In [2]:
csv_files = [f for f in listdir("../matchesdata/") if "matches_ids.csv" not in f]

**2. Create column names**

In [3]:
data = pd.DataFrame(columns=["match_id","victor_team", "radiant_team","dire_team"])

**3. Loop for each file on the folder and append to the data frame**

In [4]:
for file in csv_files:
    new = pd.read_csv(f"../matchesdata/{file}")
    data = data.append(new, ignore_index=True)
    data = data.drop_duplicates("match_id")

In [5]:
data.tail()

Unnamed: 0,match_id,victor_team,radiant_team,dire_team
21171,5335146932,Dire,96-10-126-86-85,129-91-49-72-79
21172,5346192557,Dire,10-20-23-13-108,106-90-7-11-40
21173,5389270362,Dire,101-53-2-23-100,106-11-98-7-31
21174,5368190580,Dire,128-70-23-96-45,90-43-50-7-99
21175,5339404531,Radiant,77-79-46-99-90,60-41-108-13-68


**4. Save the merged data file as csv**

In [6]:
data.to_csv("../data/matches.csv", index=False)

# Merge random matches data

In [7]:
csv_files = [f for f in listdir("../rawdata/")]
data = pd.DataFrame(columns=[
                    "match_id",
                    "match_seq_num", 
                    "radiant_win",
                    "start_time",
                    "duration",
                    "avg_mmr", 
                    "num_mmr",
                    "lobby_type",
                    "game_mode",
                    "avg_rank_tier",
                    "num_rank_tier",
                    "cluster",
                    "radiant_team",
                    "dire_team"
])

for file in csv_files:
    new = pd.read_csv(f"../rawdata/{file}")
    data = data.append(new, ignore_index=True)
    data = data.drop_duplicates("match_id")
    
data.tail()

Unnamed: 0,match_id,match_seq_num,radiant_win,start_time,duration,avg_mmr,num_mmr,lobby_type,game_mode,avg_rank_tier,num_rank_tier,cluster,radiant_team,dire_team
348,5496943702,4608430373,False,1593764833,1793,4034,3,7,22,80,7,156,107-23-30-64-1,9-10-110-120-8
349,5496943914,4608440726,False,1593764845,2418,5909,3,7,22,80,5,136,34-126-65-95-64,86-90-104-56-48
350,5496944214,4608428022,True,1593764863,1642,5973,5,7,22,80,6,154,9-48-106-14-83,49-38-12-100-86
351,5496945916,4608429140,False,1593764959,1356,7346,5,7,22,80,6,138,112-98-81-9-80,36-42-126-100-3
352,5496946503,4608428460,False,1593765013,1491,4979,4,7,22,80,8,156,54-5-63-80-108,41-68-36-120-22


In [8]:
data = (data[["match_id", "radiant_win", "radiant_team", "dire_team", "duration"]]
        .rename(columns={"radiant_win": "victor_team", "duration": "match_duration"})
        .replace({True: "Radiant", False: "Dire"}))
data

Unnamed: 0,match_id,victor_team,radiant_team,dire_team,match_duration
0,5497083018,Radiant,113-96-1-83-58,106-66-13-67-39,3223
1,5497084606,Radiant,96-22-46-119-93,19-111-39-6-16,1930
2,5497086811,Radiant,38-3-88-126-42,35-93-31-82-96,1511
3,5497087112,Dire,110-23-39-1-7,9-84-38-101-70,2127
4,5497089611,Radiant,107-59-89-21-110,48-106-3-93-53,2265
...,...,...,...,...,...
348,5496943702,Dire,107-23-30-64-1,9-10-110-120-8,1793
349,5496943914,Dire,34-126-65-95-64,86-90-104-56-48,2418
350,5496944214,Radiant,9-48-106-14-83,49-38-12-100-86,1642
351,5496945916,Dire,112-98-81-9-80,36-42-126-100-3,1356


In [9]:
data.to_csv("../data/random_matches.csv", index=False)

# Fix random matches data

In [11]:
### Import dataset for test
rms = pd.read_csv("../data/random_matches.csv")
rms.tail()

Unnamed: 0,match_id,victor_team,radiant_team,dire_team,match_duration
348,5496943702,Dire,107-23-30-64-1,9-10-110-120-8,1793
349,5496943914,Dire,34-126-65-95-64,86-90-104-56-48,2418
350,5496944214,Radiant,9-48-106-14-83,49-38-12-100-86,1642
351,5496945916,Dire,112-98-81-9-80,36-42-126-100-3,1356
352,5496946503,Dire,54-5-63-80-108,41-68-36-120-22,1491


In [13]:
rms["radiant_team"] = (rms["radiant_team"].str.split("-"))
rms["dire_team"] = rms["dire_team"].str.split("-")


In [14]:

rms["radiant_team"] = rms["radiant_team"].apply(lambda x: list(map(int, x)))
rms["dire_team"] = rms["dire_team"].apply(lambda x: list(map(int, x)))

In [15]:
rms.tail()

Unnamed: 0,match_id,victor_team,radiant_team,dire_team,match_duration
348,5496943702,Dire,"[107, 23, 30, 64, 1]","[9, 10, 110, 120, 8]",1793
349,5496943914,Dire,"[34, 126, 65, 95, 64]","[86, 90, 104, 56, 48]",2418
350,5496944214,Radiant,"[9, 48, 106, 14, 83]","[49, 38, 12, 100, 86]",1642
351,5496945916,Dire,"[112, 98, 81, 9, 80]","[36, 42, 126, 100, 3]",1356
352,5496946503,Dire,"[54, 5, 63, 80, 108]","[41, 68, 36, 120, 22]",1491


In [17]:
rms.to_csv("../data/random_matches_fixed.csv", index=False)

In [18]:
test2 = pd.read_csv("../data/random_matches_fixed.csv")
test2.tail()

Unnamed: 0,match_id,victor_team,radiant_team,dire_team,match_duration
348,5496943702,Dire,"[107, 23, 30, 64, 1]","[9, 10, 110, 120, 8]",1793
349,5496943914,Dire,"[34, 126, 65, 95, 64]","[86, 90, 104, 56, 48]",2418
350,5496944214,Radiant,"[9, 48, 106, 14, 83]","[49, 38, 12, 100, 86]",1642
351,5496945916,Dire,"[112, 98, 81, 9, 80]","[36, 42, 126, 100, 3]",1356
352,5496946503,Dire,"[54, 5, 63, 80, 108]","[41, 68, 36, 120, 22]",1491
