### 1. Closed world

In [None]:
from google.colab import drive
import pickle
import numpy as np

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# 데이터 로드

with open("./drive/MyDrive/data/processed_mon_data_closed_world.pkl", "rb") as f:
    data = pickle.load(f)
X = np.column_stack(list(data['features'].values()))
y = np.array(data['y'])  # 라벨은 그대로 사용

In [None]:
# 데이터 분할

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [14]:
# 모델 학습 - Random Forest

from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [15]:
# 평가

from sklearn.metrics import classification_report, accuracy_score
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.6368421052631579
              precision    recall  f1-score   support

           0       0.50      0.47      0.49        40
           1       0.48      0.40      0.44        40
           2       0.70      0.88      0.78        40
           3       0.58      0.70      0.64        40
           4       0.62      0.60      0.61        40
           5       0.68      0.57      0.62        40
           6       0.79      0.78      0.78        40
           7       0.69      0.62      0.66        40
           8       0.70      0.75      0.72        40
           9       0.67      0.65      0.66        40
          10       0.61      0.55      0.58        40
          11       0.63      0.55      0.59        40
          12       0.87      0.82      0.85        40
          13       0.54      0.47      0.51        40
          14       0.54      0.47      0.51        40
          15       0.67      0.78      0.72        40
          16       0.78      0.53      0.63        4

### 2. Open-world Binary Classification

In [None]:
# 데이터 로드:
with open("./drive/MyDrive/data/processed_binary_data_open_world.pkl", "rb") as f:
    data = pickle.load(f)
X = np.column_stack(list(data['features'].values()))
y = np.array(data['y'])



In [None]:
# 데이터 분할:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)



In [None]:
# 하이퍼파라미터 튜닝

from sklearn.svm import SVC
model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
model.fit(X_train, y_train)


In [None]:
# 모델 평가:
from sklearn.metrics import classification_report, accuracy_score
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


### 3. Open-world Multi-class Classification

In [None]:
# 데이터 로드:
with open("./drive/MyDrive/data/processed_multiclass_data_open_world.pkl", "rb") as f:
    data = pickle.load(f)
X = np.column_stack(list(data['features'].values()))
y = np.array(data['y'])



In [None]:
# 데이터 분할:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)



In [None]:
# 하이퍼파라미터 튜닝

from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:
#모델 평가:

from sklearn.metrics import classification_report, accuracy_score
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.6154545454545455
              precision    recall  f1-score   support

        -1.0       0.58      0.64      0.61       600
         0.0       0.61      0.55      0.58        40
         1.0       0.44      0.42      0.43        40
         2.0       0.67      0.80      0.73        40
         3.0       0.51      0.53      0.52        40
         4.0       0.74      0.70      0.72        40
         5.0       0.63      0.60      0.62        40
         6.0       0.72      0.82      0.77        40
         7.0       0.56      0.57      0.57        40
         8.0       0.76      0.65      0.70        40
         9.0       0.74      0.57      0.65        40
        10.0       0.49      0.45      0.47        40
        11.0       0.56      0.57      0.57        40
        12.0       0.77      0.85      0.81        40
        13.0       0.56      0.38      0.45        40
        14.0       0.46      0.33      0.38        40
        15.0       0.71      0.75      0.73        4

### 추가: 각 시나리오 공통 추천

In [None]:
# 하이퍼파라미터 튜닝: SVM
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1, 1, 10], 'gamma': ['scale', 'auto']}
grid = GridSearchCV(SVC(kernel='rbf'), param_grid, cv=5)
grid.fit(X_train, y_train)
print("Best Parameters:", grid.best_params_)



In [None]:
# 하이퍼파라미터 튜닝:Random Forest:

param_grid = {'n_estimators': [100, 200], 'max_depth': [10, 20, None]}
grid = GridSearchCV(RandomForestClassifier(), param_grid, cv=5)
grid.fit(X_train, y_train)
print("Best Parameters:", grid.best_params_)



In [None]:
# 교차 검증:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y, cv=5)
print("Cross-Validation Scores:", scores)
print("Mean Accuracy:", scores.mean())



In [None]:
# 특성 중요도 분석 (Random Forest):
import matplotlib.pyplot as plt
feature_importances = model.feature_importances_
plt.bar(range(len(feature_importances)), feature_importances)
plt.xlabel("Feature Index")
plt.ylabel("Importance")
plt.title("Feature Importance")
plt.show()

