In [1]:
import pandas as pd
import requests
from sklearn import preprocessing 
pd.set_option('display.max_rows', 500)


In [2]:
match_summary_df = pd.read_csv('../data/gol_with_match_ids.csv')

In [3]:
match_summary_df = match_summary_df.drop(columns=['date','season','week','year','index','patch','gol_game_num','game_length(min)','game_length(s)'])


In [4]:
match_summary_df.head()

Unnamed: 0,league,format,game_in_format,red_team,blue_team,red_team_outcome,blue_team_outcome,winner,season_format,loser,riot_match_id,min_start_time
0,LCK,BO5,1,T1,Gen.G eSports,LOSS,WIN,Gen.G eSports,PLAYOFFS,T1,111561337007371296,2024-04-14 06:00:00+00:00
1,LCK,BO5,2,Gen.G eSports,T1,LOSS,WIN,T1,PLAYOFFS,Gen.G eSports,111561337007371297,2024-04-14 06:00:00+00:00
2,LCK,BO5,3,T1,Gen.G eSports,WIN,LOSS,T1,PLAYOFFS,Gen.G eSports,111561337007371298,2024-04-14 07:15:17+00:00
3,LCK,BO5,4,T1,Gen.G eSports,LOSS,WIN,Gen.G eSports,PLAYOFFS,T1,111561337007371299,2024-04-14 07:53:09+00:00
4,LCK,BO5,5,Gen.G eSports,T1,WIN,LOSS,Gen.G eSports,PLAYOFFS,T1,111561337007371300,2024-04-14 08:23:02+00:00


## Label Encoding

In [5]:
label_encoder = preprocessing.LabelEncoder()

match_summary_df['league']= label_encoder.fit_transform(match_summary_df['league']) 
match_summary_df['format']= label_encoder.fit_transform(match_summary_df['format']) 
match_summary_df['red_team_outcome']= label_encoder.fit_transform(match_summary_df['red_team_outcome']) 
match_summary_df['blue_team_outcome']= label_encoder.fit_transform(match_summary_df['blue_team_outcome']) 
match_summary_df['season_format']= label_encoder.fit_transform(match_summary_df['season_format'])

## Handling Date Time

In [6]:
match_summary_df['month'] = pd.DatetimeIndex(match_summary_df['min_start_time']).month
match_summary_df['day'] = pd.DatetimeIndex(match_summary_df['min_start_time']).day
match_summary_df['year'] = pd.DatetimeIndex(match_summary_df['min_start_time']).year


In [7]:
match_summary_df.head()

Unnamed: 0,league,format,game_in_format,red_team,blue_team,red_team_outcome,blue_team_outcome,winner,season_format,loser,riot_match_id,min_start_time,month,day,year
0,0,2,1,T1,Gen.G eSports,0,1,Gen.G eSports,0,T1,111561337007371296,2024-04-14 06:00:00+00:00,4,14,2024
1,0,2,2,Gen.G eSports,T1,0,1,T1,0,Gen.G eSports,111561337007371297,2024-04-14 06:00:00+00:00,4,14,2024
2,0,2,3,T1,Gen.G eSports,1,0,T1,0,Gen.G eSports,111561337007371298,2024-04-14 07:15:17+00:00,4,14,2024
3,0,2,4,T1,Gen.G eSports,0,1,Gen.G eSports,0,T1,111561337007371299,2024-04-14 07:53:09+00:00,4,14,2024
4,0,2,5,Gen.G eSports,T1,1,0,Gen.G eSports,0,T1,111561337007371300,2024-04-14 08:23:02+00:00,4,14,2024


## Encoding Teams

In [8]:
team_info_df = pd.read_csv('../../post_champ_selection/data/general_data/TEAM_INFO.csv')


In [9]:
team_info_df["Region"].fillna("NorthA", inplace = True)

In [10]:
team_info_df[team_info_df.isnull().any(axis=1)]

Unnamed: 0.1,Unnamed: 0,team_id,Name,Season,Region,Games,Win rate,K:D,GPM,GDM,...,TD@15,GD@15,PPG,NASHPG,NASH%,CSM,DPM,WPM,VWPM,WCPM


In [11]:
req_columns = list(match_summary_df.columns)
req_columns.append('team_id')
merged_df = pd.merge(match_summary_df,team_info_df,how='left',left_on='red_team',right_on='Name')[req_columns].rename(columns = {'team_id':'red_team_id'})
req_columns.append('red_team_id')
req_columns.remove('red_team')

merged_df = pd.merge(merged_df,team_info_df,how='left',left_on='blue_team',right_on='Name')[req_columns].rename(columns = {'team_id':'blue_team_id'})
req_columns.append('blue_team_id')
req_columns.remove('blue_team')

merged_df = pd.merge(merged_df,team_info_df,how='left',left_on='winner',right_on='Name')[req_columns].rename(columns = {'team_id':'winner_id'})
req_columns.append('winner_id')
req_columns.remove('winner')

merged_df = pd.merge(merged_df,team_info_df,how='left',left_on='loser',right_on='Name')[req_columns].rename(columns = {'team_id':'loser_id'})
req_columns.append('loser_id')
req_columns.remove('loser')
req_columns.remove('team_id')



match_summary_df = merged_df[req_columns]

In [12]:
match_summary_df[match_summary_df.isnull().any(axis=1)]

Unnamed: 0,league,format,game_in_format,red_team_outcome,blue_team_outcome,season_format,riot_match_id,min_start_time,month,day,year,red_team_id,blue_team_id,winner_id,loser_id


In [13]:
match_summary_df.isnull().any().any()

False

In [14]:
match_summary_df.isnull().values.any()

False

In [15]:
match_summary_df

Unnamed: 0,league,format,game_in_format,red_team_outcome,blue_team_outcome,season_format,riot_match_id,min_start_time,month,day,year,red_team_id,blue_team_id,winner_id,loser_id
0,0,2,1,0,1,0,111561337007371296,2024-04-14 06:00:00+00:00,4,14,2024,2144,2145,2145,2144
1,0,2,2,0,1,0,111561337007371297,2024-04-14 06:00:00+00:00,4,14,2024,2145,2144,2144,2145
2,0,2,3,1,0,0,111561337007371298,2024-04-14 07:15:17+00:00,4,14,2024,2144,2145,2144,2145
3,0,2,4,0,1,0,111561337007371299,2024-04-14 07:53:09+00:00,4,14,2024,2144,2145,2145,2144
4,0,2,5,1,0,0,111561337007371300,2024-04-14 08:23:02+00:00,4,14,2024,2145,2144,2145,2144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497,2,0,1,0,1,1,111997906552170251,2024-03-09 20:00:00+00:00,3,9,2024,2173,2170,2170,2173
498,2,0,1,0,1,1,111997906552104673,2024-03-09 19:00:00+00:00,3,9,2024,2167,2166,2166,2167
499,2,0,1,1,0,1,111997906552104657,2024-03-09 18:00:00+00:00,3,9,2024,2169,2349,2169,2349
500,2,0,1,0,1,1,111997906552104653,2024-03-09 17:00:00+00:00,3,9,2024,2172,2168,2168,2172


In [16]:
match_summary_df.to_csv('../data/encoded_match_data.csv',index=False)