In [37]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import ensemble
from sklearn.metrics import accuracy_score, classification_report

from sklearn.svm import SVC
from xgboost import XGBClassifier

from scipy.stats import randint
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import time
import pandas as pd
import cv2
import numpy as np

In [15]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)

In [16]:
train = pd.read_csv('fashionmnist/fashion-mnist_train.csv')
test = pd.read_csv('fashionmnist/fashion-mnist_test.csv')

In [17]:
df_train = train.copy()
df_test = test.copy()

In [18]:
X_train= df_train.drop(['label'],axis = 1)
X_test = df_train['label']
y_test = df_test.drop(['label'],axis = 1)

X_train = X_train.astype('float32')
y_test = y_test.astype('float32')
X_train /= 255.0
y_test /=255.0

In [19]:
seed = 99
np.random.seed(seed)
X_train, X_val, y_train, y_val = train_test_split(X_train, X_test, test_size=0.1, random_state = seed)

In [31]:
pca = PCA(n_components=400)
pca.fit(X_train)
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_val)
y_test_pca = pca.transform(y_test)

X_train_PCA1 = pd.DataFrame(X_train_pca)
X_test_PCA1 = pd.DataFrame(X_test_pca)

In [21]:
# pipe_svc=make_pipeline(StandardScaler(), SVC())

# param_dist = {
#     'C' : randint(1, 100),
#     'kernel': ['poly', 'rbf'],
#     'gamma': ['scale'] + list(np.logspace(-3, 3, 7)),
# }

# GS = RandomizedSearchCV(estimator=pipe_svc, param_grid=param_dist,
#                  scoring='accuracy', cv=2, n_jobs=-1,n_iter=5)
# GS = GS.fit(X_train_PCA1,y_train)

In [22]:
# print("최적 파라미터 조합:", GS.best_params_)

In [23]:
# cv_results = GS.cv_results_

---

In [57]:
xgb_model = XGBClassifier(
    n_estimators=80,  # 트리의 개수
    learning_rate=0.3,  # 학습률
    max_depth=4,  # 트리의 최대 깊이
    subsample= 1.0,  # 훈련 데이터의 전부 사용
    colsample_bytree=0.9,  # 각 트리를 훈련할 때 사용할 특성의 비율
    gamma=1,  # 최소 손실 감소 값
    n_jobs=-1,  # 병렬 처리에 사용할 CPU 코어의 수
    random_state=42  # 난수 생성기의 시드
)
xgb_model.fit(X_train_PCA1, y_train)

In [58]:
y_train_pred_xgb = xgb_model.predict(X_train_PCA1)
y_val_pred_xgb = xgb_model.predict(X_test_PCA1)

In [59]:
# 정확도 출력
train_accuracy_xgb = accuracy_score(y_train, y_train_pred_xgb)
val_accuracy_xgb = accuracy_score(y_val, y_val_pred_xgb)

print("Train Accuracy score (XGBoost): {}".format(train_accuracy_xgb))
print("Test Accuracy score (XGBoost): {}".format(val_accuracy_xgb))

# 분류 보고서 출력
print(classification_report(y_val, y_val_pred_xgb))

Train Accuracy score (XGBoost): 0.9511296296296297
Test Accuracy score (XGBoost): 0.8745
              precision    recall  f1-score   support

           0       0.81      0.84      0.83       574
           1       0.99      0.97      0.98       596
           2       0.79      0.78      0.78       573
           3       0.88      0.90      0.89       624
           4       0.80      0.81      0.81       635
           5       0.96      0.93      0.94       632
           6       0.68      0.66      0.67       600
           7       0.92      0.95      0.94       604
           8       0.95      0.95      0.95       598
           9       0.95      0.96      0.96       564

    accuracy                           0.87      6000
   macro avg       0.87      0.87      0.87      6000
weighted avg       0.87      0.87      0.87      6000



---

In [32]:
svc = SVC(gamma='scale',kernel='rbf',C=8)
svc.fit(X_train_PCA1,y_train)

In [35]:
y_pred_svc = svc.predict(X_test_PCA1)
y_train_svc = svc.predict(X_train_PCA1)

In [38]:
svc_train = accuracy_score(y_train,y_train_svc)
svc_accuracy = accuracy_score(y_val, y_pred_svc)

print("Train Accuracy score: {}".format(svc_train))
print("Test Accuracy score: {}".format(svc_accuracy))
print(classification_report(y_val, y_pred_svc))

Train Accuracy score: 0.9789074074074074
Test Accuracy score: 0.9105
              precision    recall  f1-score   support

           0       0.85      0.86      0.86       574
           1       0.99      0.98      0.99       596
           2       0.84      0.84      0.84       573
           3       0.91      0.94      0.92       624
           4       0.86      0.87      0.87       635
           5       0.99      0.96      0.98       632
           6       0.78      0.73      0.75       600
           7       0.94      0.97      0.95       604
           8       0.97      0.98      0.98       598
           9       0.97      0.96      0.96       564

    accuracy                           0.91      6000
   macro avg       0.91      0.91      0.91      6000
weighted avg       0.91      0.91      0.91      6000

