In [None]:
import utils
import pandas as pd

In [None]:
X_ohe_encoded=pd.read_csv('Data//X_ohe_encoded.csv')

X = X_ohe_encoded.drop(columns=['fog_train.class'])
y = X_ohe_encoded['fog_train.class']

In [None]:
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
# 데이터 결합
data_combined = pd.concat([X, y], axis=1)

# 다수 클래스와 소수 클래스를 나눔
majority_class = data_combined[data_combined['fog_train.class'] == 3]
minority_classes = data_combined[data_combined['fog_train.class'] != 3]

# 다수 클래스를 소수 클래스의 수만큼 무작위로 샘플링
majority_downsampled = resample(majority_class,
                                replace=False, # 샘플을 복원하지 않음
                                n_samples=len(minority_classes), # 소수 클래스의 수만큼 샘플링
                                random_state=42) # 재현성을 위해 난수 시드 설정

# 소수 클래스와 샘플링된 다수 클래스를 결합
downsampled_data = pd.concat([minority_classes, majority_downsampled])

# 다시 특성과 레이블 분리
X_downsampled = downsampled_data.drop(columns=['fog_train.class'])
y_downsampled = downsampled_data['fog_train.class']

# 훈련 및 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X_downsampled, y_downsampled, test_size=0.2, random_state=42)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# 선형 모델 학습
linear_model = LogisticRegression(max_iter=1000)
linear_model.fit(X_train, y_train)

# 예측
y_pred_linear = linear_model.predict(X_test)

# 다중 CSI 계산
csi = utils.calculate_csi(y_test, y_pred_linear)

print(f'CSI: {csi}')

In [None]:
'''
Logistic Regression Accuracy: 0.62578125
Logistic Regression Precision: 0.5890220040974103
Logistic Regression Recall: 0.62578125
Logistic Regression F1 Score: 0.5906919716534494
CSI: 0.34392265193370164
'''

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# SVM 모델 학습
svm_model = SVC(probability=True)
svm_model.fit(X_train, y_train)

# 예측
y_pred_svm = svm_model.predict(X_test)

# 다중 CSI 계산
csi = utils.calculate_csi(y_test, y_pred_svm)

print(f'CSI: {csi}')

In [None]:
'''
SVM Accuracy: 0.47890625
SVM Precision: 0.2293511962890625
SVM Recall: 0.47890625
SVM F1 Score: 0.310163265979926
CSI: 0.2674731182795699
'''

In [None]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, accuracy_score, precision_score, recall_score, f1_score
from scipy.stats import mode

# K-Means 클러스터링
kmeans_model = KMeans(n_clusters=len(np.unique(y)), random_state=42)
kmeans_model.fit(X_train)

# 클러스터 예측
y_pred_kmeans = kmeans_model.predict(X_test)

# 클러스터 레이블과 실제 레이블 매칭
def match_labels(true_labels, pred_labels):
    labels = np.zeros_like(pred_labels)
    for i in np.unique(pred_labels):
        mask = (pred_labels == i)
        labels[mask] = mode(true_labels[mask])[0]
    return labels

# 매칭된 클러스터 레이블
y_pred_kmeans_matched = match_labels(y_test, y_pred_kmeans)

# 다중 CSI 계산
csi = utils.calculate_csi(y_test, y_pred_kmeans_matched)

print(f'CSI: {csi}')

In [None]:
'''
K-Means Silhouette Score: 0.40265303090996946
K-Means Accuracy: 0.47890625
K-Means Precision: 0.2293511962890625
K-Means Recall: 0.47890625
K-Means F1 Score: 0.310163265979926
CSI: 0.0
'''