In [3]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import matplotlib.pyplot as plt
import math
from sklearn.metrics import roc_curve, auc, roc_auc_score, confusion_matrix, accuracy_score, f1_score, matthews_corrcoef,recall_score, precision_score
from sklearn.model_selection import KFold, train_test_split
from torch.utils.data import TensorDataset, DataLoader, random_split

In [4]:
batch_size = 128
loaded_datasets_info = torch.load('/Users/jiaming/Desktop/Lab2/datas/saved_datasets_scaled.pth')
loaded_train_dataset = loaded_datasets_info['train_dataset']
loaded_val_dataset = loaded_datasets_info['val_dataset']
loaded_test_dataset = loaded_datasets_info['test_dataset']

In [5]:
from torch.utils.data import DataLoader

def extract_features_labels_from_subset(subset):
    
    loader = DataLoader(subset, batch_size=len(subset))
    
    for features, labels in loader:
        features = features.squeeze(1).numpy()
        labels = labels.squeeze(1).numpy()
        return features, labels

X_train, y_train = extract_features_labels_from_subset(loaded_train_dataset)
X_val, y_val = extract_features_labels_from_subset(loaded_val_dataset)
X_test, y_test = extract_features_labels_from_subset(loaded_test_dataset)

# 1. XGB

In [105]:
import xgboost as xgb
import matplotlib.pyplot as plt

In [106]:
xgb_model = xgb.XGBRegressor(
    objective='reg:squarederror',  
    learning_rate=0.1,
    max_depth=10,
    n_estimators=30)

xgb_model.fit(X_train, y_train)

In [107]:
#torch.save(xgb_model.state_dict(), f'/Users/jiaming/Desktop/Lab2/datas/ROC/xgb_model.pth') # only used for nn
xgb_model.save_model('/Users/jiaming/Desktop/Lab2/datas/ROC/models/xgb_model.json') 

In [108]:
y_pred = xgb_model.predict(X_val)
AUC_val = roc_auc_score(y_val, y_pred)
ACC_val = accuracy_score(y_val, (y_pred > 0.5).astype(int))
MCC_val = matthews_corrcoef(y_val, (y_pred > 0.5).astype(int))
Sn_val = recall_score(y_val, (y_pred > 0.5).astype(int))
Sp_val = precision_score(y_val, (y_pred > 0.5).astype(int))
F1_val = f1_score(y_val, (y_pred > 0.5).astype(int))
print(AUC_val, ACC_val, MCC_val, Sn_val, Sp_val, F1_val)

0.6233704443810067 0.5922330097087378 0.18927265871633372 0.5663716814159292 0.6464646464646465 0.6037735849056604


In [109]:
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/val/y_val_pred_xgb.npy', y_pred)
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/val/y_val_xgb.npy', y_val)

In [110]:
y_pred = xgb_model.predict(X_test)
AUC_test = roc_auc_score(y_test, y_pred)
ACC_test = accuracy_score(y_test, (y_pred > 0.5).astype(int))
MCC_test = matthews_corrcoef(y_test, (y_pred > 0.5).astype(int))
Sn_test = recall_score(y_test, (y_pred > 0.5).astype(int))
Sp_test = precision_score(y_test, (y_pred > 0.5).astype(int))
F1_test = f1_score(y_test, (y_pred > 0.5).astype(int))
print(AUC_test, ACC_test, MCC_test, Sn_test, Sp_test, F1_test)

0.6319143819143819 0.5503875968992248 0.10421428116096709 0.49242424242424243 0.5701754385964912 0.5284552845528455


In [111]:
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/test/y_test_pred_xgb.npy', y_pred)
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/test/y_test_xgb.npy', y_test)

# 2. SVM

In [112]:
from sklearn.svm import SVC

In [113]:
#svm_model = SVC(kernel='linear', C=1.0, probability = True)
svm_model = SVC(C=3.0, probability=True)
svm_model.fit(X_train, y_train)

In [114]:
from joblib import dump, load

# Assuming svm_model is your trained SVM model
dump(svm_model, '/Users/jiaming/Desktop/Lab2/datas/ROC/models/svm_model.joblib')

# To load the model back from the file
# svm_model = load('/Users/jiaming/Desktop/Lab2/datas/ROC/models/svm_model.joblib')


['/Users/jiaming/Desktop/Lab2/datas/ROC/models/svm_model.joblib']

In [115]:
y_pred = svm_model.predict_proba(X_val)[:, 1]
AUC_val = roc_auc_score(y_val, y_pred)
ACC_val = accuracy_score(y_val, (y_pred > 0.5).astype(int))
MCC_val = matthews_corrcoef(y_val, (y_pred > 0.5).astype(int))
Sn_val = recall_score(y_val, (y_pred > 0.5).astype(int))
Sp_val = precision_score(y_val, (y_pred > 0.5).astype(int))
F1_val = f1_score(y_val, (y_pred > 0.5).astype(int))
print(AUC_val, ACC_val, MCC_val, Sn_val, Sp_val, F1_val)

0.6532495955847368 0.5776699029126213 0.15513737756458576 0.5752212389380531 0.625 0.599078341013825


In [116]:
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/val/y_val_pred_svm.npy', y_pred)
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/val/y_val_svm.npy', y_val)

In [117]:
y_pred = svm_model.predict_proba(X_test)[:, 1]
AUC_test = roc_auc_score(y_test, y_pred)
ACC_test = accuracy_score(y_test, (y_pred > 0.5).astype(int))
MCC_test = matthews_corrcoef(y_test, (y_pred > 0.5).astype(int))
Sn_test = recall_score(y_test, (y_pred > 0.5).astype(int))
Sp_test = precision_score(y_test, (y_pred > 0.5).astype(int))
F1_test = f1_score(y_test, (y_pred > 0.5).astype(int))
print(AUC_test, ACC_test, MCC_test, Sn_test, Sp_test, F1_test)

0.6585497835497834 0.627906976744186 0.26216861655261253 0.553030303030303 0.6636363636363637 0.6033057851239669


In [118]:
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/test/y_test_pred_svm.npy', y_pred)
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/test/y_test_svm.npy', y_test)

# 3. RF

In [119]:
from sklearn.ensemble import RandomForestClassifier

In [120]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [121]:
# rf_model.save_model('/Users/jiaming/Desktop/Lab2/datas/ROC/models/rf_model.json') 
from joblib import dump, load

dump(rf_model, '/Users/jiaming/Desktop/Lab2/datas/ROC/models/rf_model.joblib')

# To load the model back from the file
# rf_model = load('/Users/jiaming/Desktop/Lab2/datas/ROC/models/rf_model.joblib')

['/Users/jiaming/Desktop/Lab2/datas/ROC/models/rf_model.joblib']

In [122]:
y_pred = rf_model.predict_proba(X_val)[:, 1]
AUC_val = roc_auc_score(y_val, y_pred)
ACC_val = accuracy_score(y_val, (y_pred > 0.5).astype(int))
MCC_val = matthews_corrcoef(y_val, (y_pred > 0.5).astype(int))
Sn_val = recall_score(y_val, (y_pred > 0.5).astype(int))
Sp_val = precision_score(y_val, (y_pred > 0.5).astype(int))
F1_val = f1_score(y_val, (y_pred > 0.5).astype(int))
print(AUC_val, ACC_val, MCC_val, Sn_val, Sp_val, F1_val)

0.6633837662955562 0.6116504854368932 0.2326402184955707 0.5663716814159292 0.6736842105263158 0.6153846153846154


In [123]:
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/val/y_val_pred_rf.npy', y_pred)
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/val/y_val_rf.npy', y_val)

In [124]:
y_pred = rf_model.predict_proba(X_test)[:, 1]
AUC_test = roc_auc_score(y_test, y_pred)
ACC_test = accuracy_score(y_test, (y_pred > 0.5).astype(int))
MCC_test = matthews_corrcoef(y_test, (y_pred > 0.5).astype(int))
Sn_test = recall_score(y_test, (y_pred > 0.5).astype(int))
Sp_test = precision_score(y_test, (y_pred > 0.5).astype(int))
F1_test = f1_score(y_test, (y_pred > 0.5).astype(int))
print(AUC_test, ACC_test, MCC_test, Sn_test, Sp_test, F1_test)

0.666005291005291 0.6046511627906976 0.21351218579320086 0.5454545454545454 0.631578947368421 0.5853658536585366


In [125]:
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/test/y_test_pred_rf.npy', y_pred)
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/test/y_test_rf.npy', y_test)

# 4. NB

In [6]:
from sklearn.naive_bayes import GaussianNB

In [7]:
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

In [8]:
from joblib import dump, load

dump(nb_model, '/Users/jiaming/Desktop/Lab2/datas/ROC/models/nb_model.joblib')

['/Users/jiaming/Desktop/Lab2/datas/ROC/models/nb_model.joblib']

In [129]:
y_pred = nb_model.predict_proba(X_val)[:, 1]
AUC_val = roc_auc_score(y_val, y_pred)
ACC_val = accuracy_score(y_val, (y_pred > 0.5).astype(int))
MCC_val = matthews_corrcoef(y_val, (y_pred > 0.5).astype(int))
Sn_val = recall_score(y_val, (y_pred > 0.5).astype(int))
Sp_val = precision_score(y_val, (y_pred > 0.5).astype(int))
F1_val = f1_score(y_val, (y_pred > 0.5).astype(int))
print(AUC_val, ACC_val, MCC_val, Sn_val, Sp_val, F1_val)

0.6942620610904938 0.5339805825242718 0.2554609973023637 0.1592920353982301 0.9473684210526315 0.27272727272727276


In [130]:
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/val/y_val_pred_nb.npy', y_pred)
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/val/y_val_nb.npy', y_val)

In [131]:
y_pred = nb_model.predict_proba(X_test)[:, 1]
AUC_test = roc_auc_score(y_test, y_pred)
ACC_test = accuracy_score(y_test, (y_pred > 0.5).astype(int))
MCC_test = matthews_corrcoef(y_test, (y_pred > 0.5).astype(int))
Sn_test = recall_score(y_test, (y_pred > 0.5).astype(int))
Sp_test = precision_score(y_test, (y_pred > 0.5).astype(int))
F1_test = f1_score(y_test, (y_pred > 0.5).astype(int))
print(AUC_test, ACC_test, MCC_test, Sn_test, Sp_test, F1_test)

0.7086940836940837 0.5193798449612403 0.09933114583726094 0.12121212121212122 0.6666666666666666 0.20512820512820512


In [132]:
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/test/y_test_pred_nb.npy', y_pred)
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/test/y_test_nb.npy', y_test)

# 5. LR (Logistic Regresssion)

In [194]:
from sklearn.linear_model import LogisticRegression

In [195]:
# lr_model = LogisticRegression(max_iter=1000, random_state=42)
lr_model = LogisticRegression(max_iter=100, random_state=42)
lr_model.fit(X_train, y_train)

In [196]:
from joblib import dump, load

dump(lr_model, '/Users/jiaming/Desktop/Lab2/datas/ROC/models/lr_model.joblib')

['/Users/jiaming/Desktop/Lab2/datas/ROC/models/lr_model.joblib']

In [197]:
y_pred = lr_model.predict_proba(X_val)[:, 1]
AUC_val = roc_auc_score(y_val, y_pred)
ACC_val = accuracy_score(y_val, (y_pred > 0.5).astype(int))
MCC_val = matthews_corrcoef(y_val, (y_pred > 0.5).astype(int))
Sn_val = recall_score(y_val, (y_pred > 0.5).astype(int))
Sp_val = precision_score(y_val, (y_pred > 0.5).astype(int))
F1_val = f1_score(y_val, (y_pred > 0.5).astype(int))
print(AUC_val, ACC_val,MCC_val,Sn_val,Sp_val, F1_val)

0.7201446379293939 0.6504854368932039 0.3086059187663238 0.6106194690265486 0.711340206185567 0.6571428571428571


In [198]:
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/val/y_val_pred_lr.npy', y_pred)
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/val/y_val_lr.npy', y_val)

In [199]:
y_pred = lr_model.predict_proba(X_test)[:, 1]
AUC_test = roc_auc_score(y_test, y_pred)
ACC_test = accuracy_score(y_test, (y_pred > 0.5).astype(int))
MCC_test = matthews_corrcoef(y_test, (y_pred > 0.5).astype(int))
Sn_test = recall_score(y_test, (y_pred > 0.5).astype(int))
Sp_test = precision_score(y_test, (y_pred > 0.5).astype(int))
F1_test = f1_score(y_test, (y_pred > 0.5).astype(int))
print(AUC_test, ACC_test, MCC_test,Sn_test,Sp_test, F1_test)

0.7168710918710919 0.6782945736434108 0.3575788635001265 0.6590909090909091 0.696 0.6770428015564202


In [200]:
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/test/y_test_pred_lr.npy', y_pred)
np.save('/Users/jiaming/Desktop/Lab2/datas/ROC/test/y_test_lr.npy', y_test)

In [201]:
# 查看权重分配
weights = lr_model.coef_
intercept = lr_model.intercept_

print(f"Weights (coefficients): {weights}")
print(f"Intercepts: {intercept}")

Weights (coefficients): [[ 5.58853978e-01  2.98264016e-01 -6.39688828e-02  3.02596337e-01
   1.10264339e-01  4.29190740e-02 -1.16489216e-01  5.84512012e-02
  -7.07853145e-02  7.07853145e-02  5.35062059e-02 -3.52294753e-02
  -1.10325800e-02 -2.40048215e-02 -4.42258168e-03  4.42258168e-03
  -6.56049243e-02  6.56049243e-02 -2.41480141e-01  2.41480141e-01
   1.42870511e-04 -1.42870511e-04 -9.11320802e-02  9.11320802e-02]]
Intercepts: [-0.03065455]
