## Open world - MultiClass

### 1. RandomForest

In [1]:
from google.colab import drive
import pickle
import numpy as np

In [2]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# 데이터 로드:
with open("./drive/MyDrive/data/processed_multiclass_data_open_world.pkl", "rb") as f:
    data = pickle.load(f)
X = np.column_stack(list(data['features'].values()))
y = np.array(data['y'])


In [4]:
# 데이터 분할

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)


In [5]:
# 모델 학습 - Random Forest

from sklearn.ensemble import RandomForestClassifier
model_rf = RandomForestClassifier(n_estimators=200, random_state=42, class_weight='balanced')
model_rf.fit(X_train, y_train)


In [6]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model_rf, X_train, y_train, cv=5)
print("Cross-validation scores:", scores)
print("Mean cross-validation score:", scores.mean())

Cross-validation scores: [0.61647727 0.60710227 0.61107955 0.61420455 0.61534091]
Mean cross-validation score: 0.6128409090909092


In [7]:
# 모델 평가

from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
y_pred = model_rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")

print(classification_report(y_test, y_pred))


Accuracy: 0.62
Precision: 0.62
Recall: 0.62
              precision    recall  f1-score   support

        -1.0       0.59      0.61      0.60       600
         0.0       0.71      0.55      0.62        40
         1.0       0.45      0.45      0.45        40
         2.0       0.68      0.80      0.74        40
         3.0       0.58      0.55      0.56        40
         4.0       0.63      0.65      0.64        40
         5.0       0.63      0.65      0.64        40
         6.0       0.73      0.80      0.76        40
         7.0       0.51      0.55      0.53        40
         8.0       0.82      0.70      0.76        40
         9.0       0.71      0.55      0.62        40
        10.0       0.46      0.42      0.44        40
        11.0       0.67      0.60      0.63        40
        12.0       0.75      0.82      0.79        40
        13.0       0.61      0.35      0.44        40
        14.0       0.43      0.33      0.37        40
        15.0       0.67      0.70    

### 2. Support Vector Machine

In [None]:
# 모델 학습 - SVM

from sklearn.svm import SVC
model_svm = SVC(kernel='rbf', C=10, gamma=0.5, random_state=42)
model_svm.fit(X_train, y_train)


In [9]:
# 모델 평가
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

y_pred = model_svm.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"Confusion Matrix:")
print(conf_matrix)

print(classification_report(y_test, y_pred))

Accuracy: 0.15
Precision: 0.03
Recall: 0.15
Confusion Matrix:
[[583   0   0 ...   0   0   0]
 [ 40   0   0 ...   0   0   0]
 [ 40   0   0 ...   0   0   0]
 ...
 [ 27   0   0 ...   0   0   0]
 [ 40   0   0 ...   0   0   0]
 [ 31   0   0 ...   0   0   0]]
              precision    recall  f1-score   support

        -1.0       0.14      0.97      0.24       600
         0.0       0.00      0.00      0.00        40
         1.0       0.00      0.00      0.00        40
         2.0       0.00      0.00      0.00        40
         3.0       0.00      0.00      0.00        40
         4.0       0.00      0.00      0.00        40
         5.0       0.00      0.00      0.00        40
         6.0       0.42      0.20      0.27        40
         7.0       0.00      0.00      0.00        40
         8.0       0.00      0.00      0.00        40
         9.0       0.00      0.00      0.00        40
        10.0       0.00      0.00      0.00        40
        11.0       0.00      0.00      0.00

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
