# Multilayer Perceptron: Problem

## 0. 모듈 불러오기

In [None]:
''' 기본 모듈 및 시각화 모듈 '''
from IPython.display import display
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

''' 데이터 전처리 모듈 '''
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

''' Multilayer Perceptron 모듈 '''
from sklearn.neural_network import MLPClassifier

''' 결과 평가용 모듈 '''
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score

''' 기타 optional'''
pd.options.display.float_format = '{:.4f}'.format

! git colne https://github.com/KU-DIC/LG_time_series_day08.git

## 1. 데이터 불러오기
### StarCraft 분류 문제
<a href='https://archive.ics.uci.edu/ml/datasets/SkillCraft1+Master+Table+Dataset'> https://archive.ics.uci.edu/ml/datasets/SkillCraft1+Master+Table+Dataset </a>

### 설명변수 (X)
- 1. GameID: Unique ID number for each game (integer)
- 3. Age: Age of each player (integer)
- 4. HoursPerWeek: Reported hours spent playing per week (integer)
- 5. TotalHours: Reported total hours spent playing (integer)
- 6. APM: Action per minute (continuous)
- 7. SelectByHotkeys: Number of unit or building selections made using hotkeys per timestamp (continuous)
- 8. AssignToHotkeys: Number of units or buildings assigned to hotkeys per timestamp (continuous)
- 9. UniqueHotkeys: Number of unique hotkeys used per timestamp (continuous)
- 10. MinimapAttacks: Number of attack actions on minimap per timestamp (continuous)
- 11. MinimapRightClicks: number of right-clicks on minimap per timestamp (continuous)
- 12. NumberOfPACs: Number of PACs per timestamp (continuous)
- 13. GapBetweenPACs: Mean duration in milliseconds between PACs (continuous)
- 14. ActionLatency: Mean latency from the onset of a PACs to their first action in milliseconds (continuous)
- 15. ActionsInPAC: Mean number of actions within each PAC (continuous)
- 16. TotalMapExplored: The number of 24x24 game coordinate grids viewed by the player per timestamp (continuous)
- 17. WorkersMade: Number of SCVs, drones, and probes trained per timestamp (continuous)
- 18. UniqueUnitsMade: Unique unites made per timestamp (continuous)
- 19. ComplexUnitsMade: Number of ghosts, infestors, and high templars trained per timestamp (continuous)
- 20. ComplexAbilitiesUsed: Abilities requiring specific targeting instructions used per timestamp (continuous)

### 반응변수 (Y)
- 2. LeagueIndex: Bronze, Silver, Gold, Platinum, Diamond, Master, GrandMaster, and Professional leagues coded 1-8 (Ordinal)

In [None]:
data = pd.read_csv('./LG_time_series_day08/data/SkillCraft1_Dataset.csv')

## 2. 데이터 전처리하기

### 데이터 확인

In [None]:
print('Data shape: {}'.format(data.'''Answer'''))
data.head()

### Column별 기초 통계량 확인

In [None]:
data.'''Answer'''

### 불필요한 변수 제거

In [None]:
# GameID 고유 개수
num_id = len(data['''Answer'''].unique())

print(f'ID: {num_id}')

In [None]:
# GameID 제거
data.drop(columns=['''Answer'''], axis=1, inplace=True)

In [None]:
print('Data shape: {}'.format(data.'''Answer'''))
data.head()

### 반응변수(Y)의 클래스 비율 확인
1: 브론즈<br>
2: 실버<br>
3: 골드<br>
4: 플래티넘<br>
5: 다이아<br>
6: 마스터<br>
7: 그랜드마스터<br>
8: 프로

In [None]:
data['LeagueIndex'].'''Answer'''

### 설명변수(X)와 반응변수(Y) 정의
#### 다이아 미만: 0, 다이아 이상: 1로 반응변수(y) 재정의

In [None]:
X = data.drop(labels='''Answer''', axis=1)
y = data['''Answer'''].values

# 다이아 미만 : 0, 다이아 이상 : 1로 y 값 조정
y[y<5] = 0
y[y>=5] = 1

In [None]:
display(X.head())
display(y)

### 학습 데이터(Training Dataset)와 테스트 데이터(Testing Dataset) 분리
- 학습데이터와 테스트 데이터의 클래스 비율이 달라지지 않도록 stratify 옵션 사용

In [None]:
train_X, test_X, train_y, test_y = train_test_split('''Answer''', '''Answer''', stratify=y, test_size=0.3, random_state=0)

### 정규화 : Standardization(표준정규화) with Standard Scaler

In [None]:
train_X.describe()

In [None]:
# 정규화
scaler = StandardScaler()
scaler.fit(train_X)

train_X = scaler.transform(train_X)
test_X = scaler.transform(test_X)

In [None]:
pd.DataFrame(train_X, columns = X.columns).describe()

## 3. 모델링: Multilayer Perceptron (MLP) Classifier

In [None]:
clf_mlp = MLPClassifier(hidden_layer_sizes=100,
                        activation='''Answer''',
                        solver='''Answer''',
                        batch_size=64,
                        learning_rate_init=0.005,
                        max_iter=500,
                        verbose=True,
                        random_state=0)

### MLP Classifier 학습

In [None]:
clf_mlp.fit('''Answer''', '''Answer''')

### 학습 상태 확인 (learning curve)

In [None]:
plt.figure(figsize=(15, 8))

train_loss_values = clf_mlp.loss_curve_
plt.plot(train_loss_values,label='Train Loss')

plt.legend(fontsize=15)
plt.title("Learning Curve of trained MLP Classifier", size=20)
plt.show()

## 4. Multilayer Perceptron 모델 성능 평가

### 학습된 MLP Classifier 결과 확인: Training Data

In [None]:
# 학습한 데이터에 대한 예측값
train_y_pred = clf_mlp.predict('''Answer''')

# confusion matrix 계산
cm_train = confusion_matrix(y_true=train_y, y_pred=train_y_pred, labels=[1,0])

# 시각화
plt.figure(figsize=(10, 8))
sns.heatmap(data=cm_train, annot=True, fmt='d', annot_kws={'size': 30}, cmap='Blues')
plt.xticks(ticks=[0.5,1.5], labels=[1,0], size=15)
plt.yticks(ticks=[0.5,1.5], labels=[1,0], size=15)
plt.xlabel('Predicted', size=20)
plt.ylabel('True', size=20)
plt.show()

### 학습된 MLP Classifier 결과 확인: Testing Data

In [None]:
# 테스트 데이터에 대한 예측값
test_y_pred = clf_mlp.predict('''Answer''')

# confusion matrix 계산
cm_test = confusion_matrix(y_true=test_y, y_pred=test_y_pred, labels=[1,0])

# 시각화
plt.figure(figsize=(10, 8))
sns.heatmap(data=cm_test, annot=True, fmt='d', annot_kws={'size': 30}, cmap='Blues')
plt.xticks(ticks=[0.5,1.5], labels=[1,0], size=15)
plt.yticks(ticks=[0.5,1.5], labels=[1,0], size=15)
plt.xlabel('Predicted', size=20)
plt.ylabel('True', size=20)
plt.show()

### 학습된 MLP Classifier 성능 평가: Training Data

In [None]:
train_acc = accuracy_score('''Answer''', '''Answer''')
train_rec = recall_score('''Answer''', '''Answer''')
train_prec = precision_score('''Answer''', '''Answer''')
train_f1 = f1_score('''Answer''', '''Answer''')


print('Train Accuracy   : {:.3f}'.format(train_acc))
print('Train Sensitivity: {:.3f}'.format(train_rec))
print('Train Precision  : {:.3f}'.format(train_prec))
print('Train F1 Score   : {:.3f}'.format(train_f1))

### 학습된 MLP Classifier 성능 평가: Testing Data

In [None]:
test_acc = accuracy_score('''Answer''', '''Answer''')
test_rec = recall_score('''Answer''', '''Answer''')
test_prec = precision_score('''Answer''', '''Answer''')
test_f1 = f1_score('''Answer''', '''Answer''')

print('Test Accuracy   : {:.3f}'.format(test_acc))
print('Test Recall     : {:.3f}'.format(test_rec))
print('Test Precision  : {:.3f}'.format(test_prec))
print('Test F1 Score   : {:.3f}'.format(test_f1))