## 데이터 불러오기

In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
from tqdm import tqdm

df = pd.read_feather('./experiment/analysis_result.ftr')

models = ('model1', 'model2', 'model3')
categories = ('gold', 'creep', 'kda', 'avg', 'avg_nocreep')

analysis = defaultdict(lambda: defaultdict(lambda: np.zeros([10, 10], dtype=np.int8)))

for row in tqdm(df.itertuples(), total=len(df)):
    for cat in categories:
        for model in models:
            analysis[model][cat][getattr(row, f'score_{model}')][getattr(row, cat)] += 1

for cat in categories:
    for model in models:
        analysis[model][cat] = {f'score_{model}': [], cat[7:]: [], 'number': []}
        
    for srank in range(10):
        for gckrank in range(10):
            analysis_df[cat][f'score_{cat[:6]}'].append(srank+1)
            analysis_df[cat][cat[7:]].append(gckrank+1)
            analysis_df[cat]['number'].append(analysis[cat][srank][gckrank])
    analysis_df[cat] = pd.DataFrame(analysis_df[cat])

100%|██████████| 455750/455750 [00:01<00:00, 251568.43it/s]
100%|██████████| 10/10 [00:00<00:00, 4359.98it/s]


## Gold 획득량 순위와 모델 스코어 간의 관계

### Model 1 (h_0 값을 승자와 패자 다르게 준 것)

In [21]:
input_df = analysis['model']['gold']
fig = px.scatter(
    input_df, x='gold', y='score_model1',
    labels={
        'score_model1': 'model score'
    },
    size='number', color='number', size_max=30, width=600, height=600)
fig.show()

### Model 2 (h_0 값을 승자와 패자 같게 준 것)

In [22]:
input_df = analysis_df['model2_gold']
fig = px.scatter(
    input_df, x='gold', y='score_model2',
    labels={
        'score_model2': 'model score'
    },
    size='number', color='number', size_max=30, width=600, height=600)
fig.show()

## KDA 순위와 모델 스코어 간의 관계

### Model 1

In [19]:
input_df = analysis_df['model1_kda']
fig = px.scatter(
    input_df, x='kda', y='score_model1',
    labels={
        'score_model1': 'model score'
    },
    size='number', color='number', size_max=30, width=600, height=600)
fig.show()

### Model 2

In [20]:
input_df = analysis_df['model2_kda']
fig = px.scatter(
    input_df, x='kda', y='score_model2',
    labels={
        'score_model2': 'model score'
    },
    size='number', color='number', size_max=30, width=600, height=600)
fig.show()

## Creep score 순위와 모델 스코어 간의 관계

### Model 1

In [17]:
input_df = analysis_df['model1_creep']
fig = px.scatter(
    input_df, x='creep', y='score_model1',
    labels={
        'creep': 'creep score',
        'score_model1': 'model score'
    },
    size='number', color='number', size_max=30, width=600, height=600)
fig.show()

### Model 2

In [18]:
input_df = analysis_df['model2_creep']
fig = px.scatter(
    input_df, x='creep', y='score_model2',
    labels={
        'creep': 'creep score',
        'score_model2': 'model score'
    },
    size='number', color='number', size_max=30, width=600, height=600)
fig.show()

## 기존 지표 평균과 모델 스코어 간의 관계

### Model 1

In [16]:
input_df = analysis_df['model1_avg']
fig = px.scatter(
    input_df, x='avg', y='score_model1',
    labels={
        'avg': 'average of traditional indicators',
        'score_model1': 'model score'
    },
    size='number', color='number', size_max=30, width=600, height=600)
fig.show()

### Model 2

In [15]:
input_df = analysis_df['model2_avg']
fig = px.scatter(
    input_df, x='avg', y='score_model2',
    labels={
        'avg': 'average of traditional indicators',
        'score_model2': 'model score'
    },
    size='number', color='number', size_max=30, width=600, height=600)
fig.show()

## 기존 지표 평균(creep score 제외)과 모델 스코어 간의 관계

### Model 1

In [13]:
input_df = analysis_df['model1_avg_nocreep']
fig = px.scatter(
    input_df, x='avg_nocreep', y='score_model1',
    labels={
        'avg_nocreep': 'average of kda and gold',
        'score_model1': 'model score'
    },
    size='number', color='number', size_max=30, width=600, height=600)
fig.show()

### Model 2

In [14]:
input_df = analysis_df['model2_avg_nocreep']
fig = px.scatter(
    input_df, x='avg_nocreep', y='score_model2',
    labels={
        'avg_nocreep': 'average of kda and gold',
        'score_model2': 'model score'
    },
    size='number', color='number', size_max=30, width=600, height=600)
fig.show()

## 각 지표와 모델 점수간의 오차(MSE)

In [12]:
def mse(x:list, y:list):
    errs = [(xi - yi)**2 for xi, yi in zip(x, y)]
    err = sum(errs) / len(errs)
    return err

inds = ['gold', 'creep', 'kda', 'avg', 'avg_nocreep']
# model1
for ind in inds:
    err = mse(df.loc[:, ind].tolist(), df.loc[:, 'score_model1'].tolist())
    print(f'MSE between {ind:11s} rank and score_model1 rank: ', err)

# model2
for ind in inds:
    err = mse(df.loc[:, ind].tolist(), df.loc[:, 'score_model2'].tolist())
    print(f'MSE between {ind:11s} rank and score_model2 rank: ', err)

MSE between gold        rank and score_model1 rank:  12.625228743828854
MSE between creep       rank and score_model1 rank:  17.63213165112452
MSE between kda         rank and score_model1 rank:  7.2044103126714205
MSE between avg         rank and score_model1 rank:  9.040302797586396
MSE between avg_nocreep rank and score_model1 rank:  8.019188151398794
MSE between gold        rank and score_model2 rank:  8.220563905650028
MSE between creep       rank and score_model2 rank:  15.542371914426768
MSE between kda         rank and score_model2 rank:  7.796383982446517
MSE between avg         rank and score_model2 rank:  7.0783543609435
MSE between avg_nocreep rank and score_model2 rank:  6.113428414701042
