In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import matplotlib.pyplot as plt
import math
from sklearn.metrics import roc_curve, auc, roc_auc_score, confusion_matrix, accuracy_score, f1_score, matthews_corrcoef,recall_score, precision_score
from sklearn.model_selection import KFold, train_test_split
from torch.utils.data import TensorDataset, DataLoader, random_split

In [2]:
loaded_datasets_info = torch.load('/root/autodl-tmp/imgs/US/saved_datasets_US.pth')
loaded_train_dataset = loaded_datasets_info['train_dataset']
loaded_val_dataset = loaded_datasets_info['val_dataset']
loaded_test_dataset = loaded_datasets_info['test_dataset']

  loaded_datasets_info = torch.load('/root/autodl-tmp/imgs/US/saved_datasets_US.pth')


In [3]:
from torch.utils.data import DataLoader

def extract_features_labels_from_subset(subset):
    
    loader = DataLoader(subset, batch_size=len(subset))
    
    for features, labels in loader:
        features = features.numpy().reshape(features.shape[0], -1)
        labels = labels.squeeze(-1).numpy()
        return features, labels

X_train, y_train = extract_features_labels_from_subset(loaded_train_dataset)
X_val, y_val = extract_features_labels_from_subset(loaded_val_dataset)
X_test, y_test = extract_features_labels_from_subset(loaded_test_dataset)

# 1. XGB

In [4]:
import xgboost as xgb
import matplotlib.pyplot as plt

In [5]:
xgb_model = xgb.XGBRegressor(
    objective='reg:squarederror',  
    learning_rate=0.1,
    max_depth=5,
    n_estimators=30)

xgb_model.fit(X_train, y_train)

In [6]:
# torch.save(xgb_model.state_dict(), f'/Users/jiaming/Desktop/Lab2/datas/ROC/xgb_model.pth') # only used for nn
# xgb_model.save_model('/Users/jiaming/Desktop/Lab2/datas/ROC/models/xgb_model.json') 

In [7]:
y_pred = xgb_model.predict(X_val)
AUC_val = roc_auc_score(y_val, y_pred)
ACC_val = accuracy_score(y_val, (y_pred > 0.5).astype(int))
MCC_val = matthews_corrcoef(y_val, (y_pred > 0.5).astype(int))
Sn_val = recall_score(y_val, (y_pred > 0.5).astype(int))
Sp_val = precision_score(y_val, (y_pred > 0.5).astype(int))
F1_val = f1_score(y_val, (y_pred > 0.5).astype(int))
print(AUC_val, ACC_val, MCC_val, Sn_val, Sp_val, F1_val)

0.806640625 0.703125 0.40804713337332393 0.65625 0.7241379310344828 0.6885245901639345


In [8]:
np.save('/root/autodl-tmp/ROC/US/ML/XGB/y_val_pred.npy', y_pred)
np.save('/root/autodl-tmp/ROC/US/ML/XGB/y_val.npy', y_val)

In [9]:
y_pred = xgb_model.predict(X_test)
AUC_test = roc_auc_score(y_test, y_pred)
ACC_test = accuracy_score(y_test, (y_pred > 0.5).astype(int))
MCC_test = matthews_corrcoef(y_test, (y_pred > 0.5).astype(int))
Sn_test = recall_score(y_test, (y_pred > 0.5).astype(int))
Sp_test = precision_score(y_test, (y_pred > 0.5).astype(int))
F1_test = f1_score(y_test, (y_pred > 0.5).astype(int))
print(AUC_test, ACC_test, MCC_test, Sn_test, Sp_test, F1_test)

0.8806349206349207 0.8375 0.680336051416609 0.8 0.9 0.8470588235294118


In [10]:
np.save('/root/autodl-tmp/ROC/US/ML/XGB/y_test_pred.npy', y_pred)
np.save('/root/autodl-tmp/ROC/US/ML/XGB/y_test.npy', y_test)

# 2. SVM

In [11]:
from sklearn.svm import SVC

In [12]:
svm_model = SVC(C=3.0, probability=True)
svm_model.fit(X_train, y_train)

In [13]:
# from joblib import dump, load

# Assuming svm_model is your trained SVM model
# dump(svm_model, '/Users/jiaming/Desktop/Lab2/datas/ROC/models/svm_model.joblib')

# To load the model back from the file
# svm_model = load('/Users/jiaming/Desktop/Lab2/datas/ROC/models/svm_model.joblib')

In [14]:
y_pred = svm_model.predict_proba(X_val)[:, 1]
AUC_val = roc_auc_score(y_val, y_pred)
ACC_val = accuracy_score(y_val, (y_pred > 0.5).astype(int))
MCC_val = matthews_corrcoef(y_val, (y_pred > 0.5).astype(int))
Sn_val = recall_score(y_val, (y_pred > 0.5).astype(int))
Sp_val = precision_score(y_val, (y_pred > 0.5).astype(int))
F1_val = f1_score(y_val, (y_pred > 0.5).astype(int))
print(AUC_val, ACC_val, MCC_val, Sn_val, Sp_val, F1_val)

0.939453125 0.890625 0.7847060257179306 0.84375 0.9310344827586207 0.8852459016393444


In [15]:
np.save('/root/autodl-tmp/ROC/US/ML/SVM/y_val_pred.npy', y_pred)
np.save('/root/autodl-tmp/ROC/US/ML/SVM/y_val.npy', y_val)

In [16]:
y_pred = svm_model.predict_proba(X_test)[:, 1]
AUC_test = roc_auc_score(y_test, y_pred)
ACC_test = accuracy_score(y_test, (y_pred > 0.5).astype(int))
MCC_test = matthews_corrcoef(y_test, (y_pred > 0.5).astype(int))
Sn_test = recall_score(y_test, (y_pred > 0.5).astype(int))
Sp_test = precision_score(y_test, (y_pred > 0.5).astype(int))
F1_test = f1_score(y_test, (y_pred > 0.5).astype(int))
print(AUC_test, ACC_test, MCC_test, Sn_test, Sp_test, F1_test)

0.9314285714285714 0.85 0.7026035769441493 0.8222222222222222 0.9024390243902439 0.8604651162790697


In [17]:
np.save('/root/autodl-tmp/ROC/US/ML/SVM/y_test_pred.npy', y_pred)
np.save('/root/autodl-tmp/ROC/US/ML/SVM/y_test.npy', y_test)

# 3. RF

In [18]:
from sklearn.ensemble import RandomForestClassifier

In [19]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [20]:
# rf_model.save_model('/Users/jiaming/Desktop/Lab2/datas/ROC/models/rf_model.json') 
# from joblib import dump, load

# dump(rf_model, '/Users/jiaming/Desktop/Lab2/datas/ROC/models/rf_model.joblib')

# To load the model back from the file
# rf_model = load('/Users/jiaming/Desktop/Lab2/datas/ROC/models/rf_model.joblib')

In [21]:
y_pred = rf_model.predict_proba(X_val)[:, 1]
AUC_val = roc_auc_score(y_val, y_pred)
ACC_val = accuracy_score(y_val, (y_pred > 0.5).astype(int))
MCC_val = matthews_corrcoef(y_val, (y_pred > 0.5).astype(int))
Sn_val = recall_score(y_val, (y_pred > 0.5).astype(int))
Sp_val = precision_score(y_val, (y_pred > 0.5).astype(int))
F1_val = f1_score(y_val, (y_pred > 0.5).astype(int))
print(AUC_val, ACC_val, MCC_val, Sn_val, Sp_val, F1_val)

0.90380859375 0.78125 0.5669467095138409 0.71875 0.8214285714285714 0.7666666666666666


In [22]:
np.save('/root/autodl-tmp/ROC/US/ML/RF/y_val_pred.npy', y_pred)
np.save('/root/autodl-tmp/ROC/US/ML/RF/y_val.npy', y_val)

In [23]:
y_pred = rf_model.predict_proba(X_test)[:, 1]
AUC_test = roc_auc_score(y_test, y_pred)
ACC_test = accuracy_score(y_test, (y_pred > 0.5).astype(int))
MCC_test = matthews_corrcoef(y_test, (y_pred > 0.5).astype(int))
Sn_test = recall_score(y_test, (y_pred > 0.5).astype(int))
Sp_test = precision_score(y_test, (y_pred > 0.5).astype(int))
F1_test = f1_score(y_test, (y_pred > 0.5).astype(int))
print(AUC_test, ACC_test, MCC_test, Sn_test, Sp_test, F1_test)

0.9184126984126985 0.7875 0.5698028822981898 0.8 0.8181818181818182 0.8089887640449439


In [24]:
np.save('/root/autodl-tmp/ROC/US/ML/RF/y_test_pred.npy', y_pred)
np.save('/root/autodl-tmp/ROC/US/ML/RF/y_test.npy', y_test)

# 4. NB 

In [25]:
from sklearn.naive_bayes import GaussianNB

In [26]:
nb_model = GaussianNB()
# nb_model.fit(X_train[:, 10000:10200], y_train)
nb_model.fit(X_train, y_train)

In [27]:
# from joblib import dump, load
# dump(nb_model, '/Users/jiaming/Desktop/Lab2/datas/ROC/models/nb_model.joblib')

In [28]:
# y_pred = nb_model.predict_proba(X_val[:, 10000:10200])[:, 1]
y_pred = nb_model.predict_proba(X_val)[:, 1]
AUC_val = roc_auc_score(y_val, y_pred)
ACC_val = accuracy_score(y_val, (y_pred > 0.5).astype(int))
MCC_val = matthews_corrcoef(y_val, (y_pred > 0.5).astype(int))
Sn_val = recall_score(y_val, (y_pred > 0.5).astype(int))
Sp_val = precision_score(y_val, (y_pred > 0.5).astype(int))
F1_val = f1_score(y_val, (y_pred > 0.5).astype(int))
print(AUC_val, ACC_val, MCC_val, Sn_val, Sp_val, F1_val)

0.7138671875 0.703125 0.40644850966246954 0.71875 0.696969696969697 0.7076923076923077


In [29]:
np.save('/root/autodl-tmp/ROC/US/ML/NB/y_val_pred.npy', y_pred)
np.save('/root/autodl-tmp/ROC/US/ML/NB/y_val.npy', y_val)

In [30]:
# y_pred = nb_model.predict_proba(X_test[:, 10000:10200])[:, 1]
y_pred = nb_model.predict_proba(X_test)[:, 1]
AUC_test = roc_auc_score(y_test, y_pred)
ACC_test = accuracy_score(y_test, (y_pred > 0.5).astype(int))
MCC_test = matthews_corrcoef(y_test, (y_pred > 0.5).astype(int))
Sn_test = recall_score(y_test, (y_pred > 0.5).astype(int))
Sp_test = precision_score(y_test, (y_pred > 0.5).astype(int))
F1_test = f1_score(y_test, (y_pred > 0.5).astype(int))
print(AUC_test, ACC_test, MCC_test, Sn_test, Sp_test, F1_test)

0.7095238095238096 0.7 0.4001374631027218 0.6888888888888889 0.7560975609756098 0.7209302325581395


In [31]:
np.save('/root/autodl-tmp/ROC/US/ML/NB/y_test_pred.npy', y_pred)
np.save('/root/autodl-tmp/ROC/US/ML/NB/y_test.npy', y_test)

# 5. LR (Logistic Regresssion)

In [32]:
from sklearn.linear_model import LogisticRegression

In [33]:
lr_model = LogisticRegression(max_iter=100, 
                              C=0.001, 
                              penalty='l2', 
                              solver='liblinear', 
                              random_state=42)
lr_model.fit(X_train, y_train)

In [34]:
# from joblib import dump, load
# dump(lr_model, '/Users/jiaming/Desktop/Lab2/datas/ROC/models/lr_model.joblib')

In [35]:
y_pred = lr_model.predict_proba(X_val)[:, 1]
AUC_val = roc_auc_score(y_val, y_pred)
ACC_val = accuracy_score(y_val, (y_pred > 0.5).astype(int))
MCC_val = matthews_corrcoef(y_val, (y_pred > 0.5).astype(int))
Sn_val = recall_score(y_val, (y_pred > 0.5).astype(int))
Sp_val = precision_score(y_val, (y_pred > 0.5).astype(int))
F1_val = f1_score(y_val, (y_pred > 0.5).astype(int))
print(AUC_val, ACC_val,MCC_val,Sn_val,Sp_val, F1_val)

0.8544921875 0.8125 0.6454972243679028 0.6875 0.9166666666666666 0.7857142857142857


In [36]:
np.save('/root/autodl-tmp/ROC/US/ML/LR/y_val_pred.npy', y_pred)
np.save('/root/autodl-tmp/ROC/US/ML/LR/y_val.npy', y_val)

In [37]:
y_pred = lr_model.predict_proba(X_test)[:, 1]
AUC_test = roc_auc_score(y_test, y_pred)
ACC_test = accuracy_score(y_test, (y_pred > 0.5).astype(int))
MCC_test = matthews_corrcoef(y_test, (y_pred > 0.5).astype(int))
Sn_test = recall_score(y_test, (y_pred > 0.5).astype(int))
Sp_test = precision_score(y_test, (y_pred > 0.5).astype(int))
F1_test = f1_score(y_test, (y_pred > 0.5).astype(int))
print(AUC_test, ACC_test, MCC_test,Sn_test,Sp_test, F1_test)

0.7993650793650793 0.725 0.4505484820762931 0.7111111111111111 0.7804878048780488 0.7441860465116279


In [38]:
np.save('/root/autodl-tmp/ROC/US/ML/LR/y_test_pred.npy', y_pred)
np.save('/root/autodl-tmp/ROC/US/ML/LR/y_test.npy', y_test)

In [39]:
# check the weight distribution 
# weights = lr_model.coef_
# intercept = lr_model.intercept_
# 
# print(f"Weights (coefficients): {weights}")
# print(f"Intercepts: {intercept}")