# 기본 라이브러리 import

In [1]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt

# Data load

In [2]:
train = pd.read_csv('../data/FIFA_train.csv')
test = pd.read_csv('../data/FIFA_test.csv')

# 전처리

In [3]:
from sklearn.preprocessing import OneHotEncoder as OHE

# 불필요 피처(이름) Drop
train = train.drop(columns='name')
test = test.drop(columns='name')

# 계약기간 포맷 통일
tr_contract, te_contract = [], []
for tr in train.contract_until: tr_contract.append(tr[-4:])
for te in test.contract_until: te_contract.append(te[-4:])
train.contract_until = tr_contract
test.contract_until = te_contract

to_encode_cols = ['continent', 'position', 'prefer_foot']
encoder = OHE()
encoder.fit(train[to_encode_cols])

onehot = encoder.transform(train[to_encode_cols])
onehot = onehot.toarray()
onehot = pd.DataFrame(onehot)
onehot.columns = encoder.get_feature_names()

tr_encoded = train.drop(columns=to_encode_cols)
tr_encoded = pd.concat([tr_encoded, onehot], axis=1)

onehot = encoder.transform(test[to_encode_cols])
onehot = onehot.toarray()
onehot = pd.DataFrame(onehot)
onehot.columns = encoder.get_feature_names()

te_encoded = test.drop(columns=to_encode_cols)
te_encoded = pd.concat([te_encoded, onehot], axis=1)

# KFold

In [4]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=0)
tdatas, tvals = [], []

for tr, val in kf.split(tr_encoded):
    tdatas.append(tr)
    tvals.append(val)

## Decision Tree 

In [5]:
from sklearn.tree import DecisionTreeRegressor as DTR

model = DTR(random_state=0)

# id 제거, target 분리
tr_feat = tr_encoded.drop(columns=['id', 'value'])
tr_target = tr_encoded[['value']]

model.fit(tr_feat, tr_target)

# id 분리
te_id = te_encoded['id']
te_feat = te_encoded.drop(columns='id')

# 결과예측
prediction = model.predict(te_feat)

# id와 결과를 데이터프레임으로 변환
te_id = te_id.tolist()
prediction = prediction.tolist()

result = {
    'id': te_id,
    'value': prediction
}
result = pd.DataFrame(result)
result.to_csv('decision_tree.csv', index=False)

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor as RFR