In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import numpy as np
import pandas as pd



In [2]:
# 데이터로드
df = pd.read_csv("https://raw.githubusercontent.com/wikibook/machine-learning/2.0/data/csv/basketball_stat.csv")
train, test = train_test_split(df, test_size=0.2)

In [3]:
df.head()

Unnamed: 0,Player,Pos,3P,2P,TRB,AST,STL,BLK
0,Alex Abrines,SG,1.4,0.6,1.3,0.6,0.5,0.1
1,Steven Adams,C,0.0,4.7,7.7,1.1,1.1,1.0
2,Alexis Ajinca,C,0.0,2.3,4.5,0.3,0.5,0.6
3,Chris Andersen,C,0.0,0.8,2.6,0.4,0.4,0.6
4,Will Barton,SG,1.5,3.5,4.3,3.4,0.8,0.5


Player : 선수
Pos : 포인트가드(PG), 슈팅가드(SG), 스몰포워드(SF), 파워포워드(PF), 센터(C)
3P : 3점 슛, 2P:2점 슛, TRB:Total Rebounds, AST:Assists, BLK:Blocks

In [5]:
# 최적의 파라미터 찾는 함수 생성
def svc_param_selection(X, y, nfolds):
    svm_parameters = [{'kernel' :  ['rbf'],
                       'gamma' : [0.00001, 0.0001, 0.001, 0.01, 0.1, 1],
                       'C' : [0.01, 0.1, 1, 10, 100, 1000]}]
    clf = GridSearchCV(SVC(), svm_parameters, cv=nfolds)
    clf.fit(X, y)
    print(clf.best_params_)
    return clf

kernel : 'rbf' (Radial Basis Fuction, 가우시안 커널)
gamma : 결정 경계를 얼마나 유연하게 할지 제어(작을수록 넓은 영향범위)
C : 오차 허용 정도(크면 오차를 적게 허용 -> 과적합 위험 증가)

In [8]:
# 학습
X_train = train[['3P', 'BLK']]
y_train = train[['Pos']]
clf = svc_param_selection(X_train, y_train.values.ravel(), 10)

{'C': 1, 'gamma': 1, 'kernel': 'rbf'}


In [9]:
# 테스트 데이터 평가
X_test = test[['3P', 'BLK']]
y_test = test[['Pos']]

y_true, y_pred = y_test, clf.predict(X_test)

print(classification_report(y_true, y_pred))
print("-"*20)
print("accuracy : "+ str(accuracy_score(y_true, y_pred)))

              precision    recall  f1-score   support

           C       1.00      0.90      0.95        10
          SG       0.91      1.00      0.95        10

    accuracy                           0.95        20
   macro avg       0.95      0.95      0.95        20
weighted avg       0.95      0.95      0.95        20

--------------------
accuracy : 0.95


In [10]:
# 예측값 실제값 비교
comparison = pd.DataFrame({'prediction' : y_pred,
                           'ground_truth' : y_true.values.ravel()})
comparison

Unnamed: 0,prediction,ground_truth
0,SG,SG
1,SG,SG
2,SG,SG
3,C,C
4,SG,SG
5,SG,SG
6,C,C
7,C,C
8,C,C
9,C,C
