In [24]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import ensemble
from xgboost import XGBClassifier
import time
import pandas as pd
import cv2
import numpy as np

In [25]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)

In [26]:

train = pd.read_csv('fashion-mnist_train.csv')
test = pd.read_csv('fashion-mnist_test.csv')
train.isnull().any().sum()
test.isnull().any().sum()

0

In [27]:
df_train = train.copy()
df_test = test.copy()

In [28]:
X_train= df_train.drop(['label'],axis = 1)
X_test = df_train['label']
y_test = df_test.drop(['label'],axis = 1)

X_train = X_train.astype('float32')
y_test = y_test.astype('float32')
X_train /= 255.0
y_test /=255.0

In [29]:
seed = 99
np.random.seed(seed)
X_train, X_val, y_train, y_val = train_test_split(X_train, X_test, test_size=0.1, random_state = seed)

In [30]:
X_train.fillna(X_train.mean(), inplace=True)

In [22]:
!pip install optuna

Collecting optuna
  Downloading optuna-3.4.0-py3-none-any.whl (409 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m409.6/409.6 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.12.1-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.8/226.8 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.0-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.0 alembic-1.12.1 colorlog-6.7.0 optuna-3.4.0


In [31]:
import optuna
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

def objective(trial):
    # suggest methods are used to set the range of hyperparameters
    n_components = trial.suggest_int('n_components', 1, 600)
    gamma = trial.suggest_loguniform('gamma', 1e-3, 1e+2)
    C = trial.suggest_loguniform('C', 1e+0, 1e+4)

    pca = PCA(n_components=n_components)
    svm = SVC(gamma=gamma, C=C, kernel="rbf")

    # PCA fitting and transformation should be done inside the objective function
    pca.fit(X_train)
    X_train_pca = pca.transform(X_train)
    X_val_pca = pca.transform(X_val)

    # Use transformed data to train the classifier
    svm.fit(X_train_pca, y_train)

    # Evaluate classifier performance on validation set
    score = svm.score(X_val_pca, y_val)

    return score

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)


[I 2023-11-15 18:45:38,404] A new study created in memory with name: no-name-5d939d5f-69ad-48f7-b530-a899a63c6c21
  gamma = trial.suggest_loguniform('gamma', 1e-3, 1e+2)
  C = trial.suggest_loguniform('C', 1e+0, 1e+4)
[I 2023-11-15 18:46:09,674] Trial 0 finished with value: 0.8838028169014085 and parameters: {'n_components': 295, 'gamma': 0.0348239728601189, 'C': 563.4393271553855}. Best is trial 0 with value: 0.8838028169014085.
  gamma = trial.suggest_loguniform('gamma', 1e-3, 1e+2)
  C = trial.suggest_loguniform('C', 1e+0, 1e+4)
[I 2023-11-15 18:47:36,592] Trial 1 finished with value: 0.09565727699530517 and parameters: {'n_components': 198, 'gamma': 71.25489759363897, 'C': 2.78586287564205}. Best is trial 0 with value: 0.8838028169014085.
  gamma = trial.suggest_loguniform('gamma', 1e-3, 1e+2)
  C = trial.suggest_loguniform('C', 1e+0, 1e+4)
[I 2023-11-15 18:48:35,197] Trial 2 finished with value: 0.11091549295774648 and parameters: {'n_components': 53, 'gamma': 4.193589012709284, '

In [32]:
# Optuna를 사용한 하이퍼파라미터 튜닝
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

# 최적의 하이퍼파라미터 출력
print('Best trial:')
trial = study.best_trial
print('  Value: ', trial.value)
print('  Params: ')
for key, value in trial.params.items():
    print('    {}: {}'.format(key, value))

[I 2023-11-15 19:48:18,369] A new study created in memory with name: no-name-2ddc0679-de43-4101-928f-a2fbe58fb970
  gamma = trial.suggest_loguniform('gamma', 1e-3, 1e+2)
  C = trial.suggest_loguniform('C', 1e+0, 1e+4)
[I 2023-11-15 19:50:33,591] Trial 0 finished with value: 0.8280516431924883 and parameters: {'n_components': 518, 'gamma': 0.11207847839238501, 'C': 2.5351566250048325}. Best is trial 0 with value: 0.8280516431924883.
  gamma = trial.suggest_loguniform('gamma', 1e-3, 1e+2)
  C = trial.suggest_loguniform('C', 1e+0, 1e+4)
[I 2023-11-15 19:51:50,092] Trial 1 finished with value: 0.09565727699530517 and parameters: {'n_components': 124, 'gamma': 18.167929476109617, 'C': 12.765170132066997}. Best is trial 0 with value: 0.8280516431924883.
  gamma = trial.suggest_loguniform('gamma', 1e-3, 1e+2)
  C = trial.suggest_loguniform('C', 1e+0, 1e+4)
[I 2023-11-15 19:52:24,017] Trial 2 finished with value: 0.8914319248826291 and parameters: {'n_components': 392, 'gamma': 0.0231979501822

Best trial:
  Value:  0.8973004694835681
  Params: 
    n_components: 280
    gamma: 0.045425264641173864
    C: 4.10636432389679


In [33]:
optuna.visualization.plot_optimization_history(study)

In [34]:
# 파라미터들관의 관계
optuna.visualization.plot_parallel_coordinate(study)

In [35]:
# 하이퍼파라미터 중요도
optuna.visualization.plot_param_importances(study)

In [None]:
svc = SVC(gamma='scale',kernel='rbf',C=8)
svc.fit(X_train_PCA1,y_train)

In [None]:
y_pred_svc = svc.predict(X_test_PCA1)
y_train_svc = svc.predict(X_train_PCA1)

In [None]:
svc_train = metrics.accuracy_score(y_train,y_train_svc)
svc_accuracy = metrics.accuracy_score(y_val, y_pred_svc)

print("Train Accuracy score: {}".format(svc_train))
print("Test Accuracy score: {}".format(svc_accuracy))
print(metrics.classification_report(y_val, y_pred_svc))

Train Accuracy score: 0.9763333333333334
Test Accuracy score: 0.9101666666666667
              precision    recall  f1-score   support

           0       0.84      0.86      0.85       574
           1       0.99      0.99      0.99       596
           2       0.83      0.84      0.84       573
           3       0.91      0.94      0.93       624
           4       0.86      0.86      0.86       635
           5       0.99      0.96      0.98       632
           6       0.77      0.73      0.75       600
           7       0.94      0.97      0.96       604
           8       0.97      0.98      0.98       598
           9       0.97      0.96      0.97       564

    accuracy                           0.91      6000
   macro avg       0.91      0.91      0.91      6000
weighted avg       0.91      0.91      0.91      6000

