In [None]:
import torch
import torchvision
import torchmetrics
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scienceplots
import pathlib
import tensorflow as tf
import time
import psutil
from pathlib import Path
from tqdm.auto import tqdm

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score, precision_score, recall_score, f1_score
from sklearn.utils import class_weight
from sklearn.metrics import roc_curve, auc

In [None]:
drivepath = Path(r'C:/Users/tjfwo/OneDrive/바탕 화면/GIT_Repository\study\Biosignals')
datapath = drivepath / 'mitbih_data_split'
datapath.mkdir(parents = True, exist_ok = True)

df_train = pd.read_csv(datapath / 'mitbih_train.csv', header = None)
df_test = pd.read_csv(datapath / 'mitbih_test.csv', header = None)

In [None]:
df_train[187] = df_train[187].astype(int)
label_count_train = df_train[187].value_counts()
print(label_count_train)

df_test[187] = df_test[187].astype(int)
label_count_test = df_test[187].value_counts()
print(label_count_test)

In [None]:
plt.figure(figsize=(10,5))
my_circle=plt.Circle( (0,0), 0.7, color='white')
plt.pie(label_count_train, labels=['N','Q','V','S','F'], colors=['red','green','blue','skyblue','orange'],autopct='%1.1f%%')
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.show()

In [None]:
plt.figure(figsize=(10,5))
my_circle=plt.Circle( (0,0), 0.7, color='white')
plt.pie(label_count_test, labels=['N','Q','V','S','F'], colors=['red','green','blue','skyblue','orange'],autopct='%1.1f%%')
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.show()

In [None]:
target_train = df_train[187]
target_test = df_test[187]

y_train = target_train.values
y_test = target_test.values

y_train = y_train.reshape(len(y_train),1)
y_test = y_test.reshape(len(y_test),1)

X_train = df_train.iloc[:, :-1].values
X_test = df_test.iloc[:,:-1].values

X_train = X_train.reshape(len(X_train), X_train.shape[1])
X_test = X_test.reshape(len(X_test), X_test.shape[1])

In [None]:
all_model = pd.DataFrame(columns=['Model', 'Accuracy (Train)', 'Precision (Train)', 'Recall (Train)', 'F1 Score (Train)', 'Accuracy (Test)', 'Precision (Test)', 'Recall (Test)', 'F1 Score (Test)','Training Time','Testing Time'])
all_model

In [None]:
# define the SVM model
svm = SVC(C=1.0, kernel='rbf', gamma='scale')

# fit the model on the training data
start_time = time.time()
svm.fit(X_train_tensor, y_train_tensor)
training_time = time.time() - start_time
y_pred_svm_train = svm.predict(X_train_tensor)

# make predictions on the test data
start_time = time.time()
y_pred_svm_test = svm.predict(X_test_tensor)
testing_time = time.time() - start_time

In [None]:
#Confusion matrix function
class plot_score():
    def plot_confusion_matrix(y_true, y_pred):
        classes=['N', 'S', 'V', 'F', 'Q']
        # Compute the confusion matrix
        cm = confusion_matrix(y_true, y_pred)
        # Plot the confusion matrix
        fig, ax = plt.subplots()
        im = ax.imshow(cm, interpolation='nearest', cmap = plt.cm.Greens)
        ax.figure.colorbar(im, ax=ax)
        ax.set(xticks=np.arange(cm.shape[1]),
            yticks=np.arange(cm.shape[0]),
            xticklabels=classes,
            yticklabels=classes,
            xlabel='Predicted label',
            ylabel='True label',
            aspect='equal',
            )
        plt.xticks(np.arange(cm.shape[1]),  rotation=45, fontweight='bold', color='black',fontsize=10)
        plt.yticks(np.arange(cm.shape[0]),  fontweight='bold', color='black',fontsize=10)
        plt.ylabel('True label', fontweight='bold', color='black',fontsize=10)
        plt.xlabel('Predicted label', fontweight='bold', color='black',fontsize=10)
        plt.setp(ax.get_xticklabels(), rotation=45, ha="right",rotation_mode="anchor")
        for i in range(cm.shape[0]):
            for j in range(cm.shape[1]):
                ax.text(j, i, format(cm[i, j], 'd'),
                        ha="center", va="center", fontweight='bold',
                        color="white" if cm[i, j] > cm.max() / 2. else "black")
        fig.tight_layout()
        plt.show()
        
    def score_train(y_train, y_pred_train):
        accuracy_train = accuracy_score(y_train, y_pred_train)*100
        precision_train = precision_score(y_train, y_pred_train, average='macro')*100
        recall_train = recall_score(y_train, y_pred_train, average='macro')*100
        f1_train = f1_score(y_train, y_pred_train, average='macro')*100
        print("Accuracy (Train): {:.4f}".format(accuracy_train))
        print("Precision (Train): {:.4f}".format(precision_train))
        print("Recall (Train): {:.4f}".format(recall_train))
        print("F1 score (Train): {:.4f}".format(f1_train))
        return accuracy_train, precision_train, recall_train, f1_train
        
    def score_test(y_test, y_pred):
        accuracy_test = accuracy_score(y_test, y_pred)*100
        precision_test = precision_score(y_test, y_pred, average='macro')*100
        recall_test = recall_score(y_test, y_pred, average='macro')*100
        f1_test = f1_score(y_test, y_pred, average='macro')*100
        print("Accuracy (Test): {:.4f}".format(accuracy_test))
        print("Precision (Test): {:.4f}".format(precision_test))
        print("Recall (Test): {:.4f}".format(recall_test))
        print("F1 score (Test): {:.4f}".format(f1_test))
        return accuracy_test, precision_test, recall_test, f1_test

#plot_and_score = plot_score()
#plot_confusion_matrix_svm_train = plot_and_score.plot_confusion_matrix()

In [None]:
plot_score.plot_confusion_matrix(y_train, y_pred_svm_train)
plot_score.plot_confusion_matrix(y_test, y_pred_svm_test)
accuracy_train, precision_train, recall_train, f1_train = plot_score.score_train(y_train, y_pred_svm_train)
accuracy_test, precision_test, recall_test, f1_test = plot_score.score_test(y_test, y_pred_svm_test)

In [None]:
model_name='SVM'
all_model.loc[0] = [model_name, accuracy_train, precision_train, recall_train, f1_train, accuracy_test, precision_test, recall_test, f1_test, training_time, testing_time]
all_model

In [None]:
# define the LR model
lr = LogisticRegression(fit_intercept=True, solver='lbfgs', random_state=0)

# fit the model on the training data
start_time = time.time()
lr.fit(X_train, y_train)
training_time = time.time() - start_time
y_pred_lr_train = lr.predict(X_train)

# make predictions on the test data
start_time = time.time()
y_pred_lr_test = lr.predict(X_test)
testing_time = time.time() - start_time

In [None]:
plot_score.plot_confusion_matrix(y_train, y_pred_lr_train)
plot_score.plot_confusion_matrix(y_test, y_pred_lr_test)
accuracy_train, precision_train, recall_train, f1_train = plot_score.score_train(y_train, y_pred_lr_train)
accuracy_test, precision_test, recall_test, f1_test = plot_score.score_test(y_test, y_pred_lr_test)

In [None]:
model_name='Logistic Regression'
all_model.loc[1] = [model_name, accuracy_train, precision_train, recall_train, f1_train, accuracy_test, precision_test, recall_test, f1_test, training_time, testing_time]
all_model

In [None]:
# define the DT model
dt = DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=10, random_state=0)

# fit the model on the training data
start_time = time.time()
dt.fit(X_train, y_train)
training_time = time.time() - start_time
y_pred_dt_train = dt.predict(X_train)

# make predictions on the test data
start_time = time.time()
y_pred_dt_test = dt.predict(X_test)
testing_time = time.time() - start_time

In [None]:
plot_score.plot_confusion_matrix(y_train, y_pred_dt_train)
plot_score.plot_confusion_matrix(y_test, y_pred_dt_test)
accuracy_train, precision_train, recall_train, f1_train = plot_score.score_train(y_train, y_pred_dt_train)
accuracy_test, precision_test, recall_test, f1_test = plot_score.score_test(y_test, y_pred_dt_test)

In [None]:
model_name='Decision Tree'
all_model.loc[2] = [model_name, accuracy_train, precision_train, recall_train, f1_train, accuracy_test, precision_test, recall_test, f1_test, training_time, testing_time]
all_model

- 앞의 Classification model들의 파라미터에 대한 설정?
- NN, MLP, CNN 모델 완성 및 학습해야됨.

In [None]:
# NN_Linear
hidden = nn.Linear(187, 300, bias=True)
output = nn.Linear(300, 5, bias=True)

model_nn = nn.Sequential(hidden, output)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_nn.parameters(), lr=0.01)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_train_tensor = y_train_tensor.flatten()

history = []

for step in range(10):
    y_pred_nn_train = model_nn(X_train_tensor)
    loss = loss_fn(y_pred_nn_train, y_train_tensor)
    
    history.append({"step": step, "loss": loss.item()})

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [None]:
history

In [None]:
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
y_test_tensor = y_test_tensor.flatten()

history_test = []

model_nn.eval()

with torch.no_grad():
    y_pred_nn_test = model_nn(X_test_tensor)
    loss = loss_fn(y_pred_nn_test, y_test_tensor)
    history_test.append({"step": step, "loss": loss.item()})

In [None]:
y_train.shape, y_pred_nn_train.detach().numpy().argmax(axis=1).shape

In [None]:
plot_score.plot_confusion_matrix(y_train_tensor, y_pred_nn_train.argmax(dim=1))
plot_score.plot_confusion_matrix(y_test_tensor, y_pred_nn_test.argmax(dim=1))
accuracy_train, precision_train, recall_train, f1_train = plot_score.score_train(y_train, y_pred_nn_train.detach().numpy().argmax(axis=1))
accuracy_test, precision_test, recall_test, f1_test = plot_score.score_test(y_test, y_pred_nn_test.detach().numpy().argmax(axis=1))

In [None]:
# NN_NonLinear
hidden = nn.Linear(187, 300, bias=True)
activation = nn.ReLU()
output = nn.Linear(300, 5, bias=True)

model_non = nn.Sequential(hidden, activation, output)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_non.parameters(), lr=0.01)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_train_tensor = y_train_tensor.flatten()

history = []

for step in range(10):
    y_pred_non_train = model_non(X_train_tensor)
    loss = loss_fn(y_pred_non_train, y_train_tensor)
    
    history.append({"step": step, "loss": loss.item()})

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [None]:
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
y_test_tensor = y_test_tensor.flatten()

history_test = []

model_non.eval()

with torch.no_grad():
    y_pred_non_test = model_non(X_test_tensor)
    loss = loss_fn(y_pred_non_test, y_test_tensor)
    history_test.append({"step": step, "loss": loss.item()})

In [None]:
plot_score.plot_confusion_matrix(y_train_tensor, y_pred_non_train.argmax(dim=1))
plot_score.plot_confusion_matrix(y_test_tensor, y_pred_non_test.argmax(dim=1))
accuracy_train, precision_train, recall_train, f1_train = plot_score.score_train(y_train, y_pred_non_train.detach().numpy().argmax(axis=1))
accuracy_test, precision_test, recall_test, f1_test = plot_score.score_test(y_test, y_pred_non_test.detach().numpy().argmax(axis=1))

In [None]:
#MLP
model_mlp = nn.Sequential(
    nn.Linear(187,300),
    nn.ReLU(),
    nn.Linear(300,5),
)

loss_fn = nn.CrossEntropyLoss(reduction='sum')
optimizer = optim.SGD(model_mlp.parameters(), lr = 0.001)
num_epoch = 30
batch_size = 100

X_traintrain_tensor, X_trainval_tensor, y_traintrain_tensor, y_trainval_tensor = train_test_split(
    X_train_tensor,
    y_train_tensor,
    test_size=0.5,
    random_state=0,
    shuffle=True,
    stratify=y_train_tensor,
)

train_dataset = TensorDataset(X_traintrain_tensor, y_traintrain_tensor)
val_dataset = TensorDataset(X_trainval_tensor, y_trainval_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

history = []

for epoch in tqdm(range(num_epoch)):
    
    train_loss = 0.0
    eval_loss = 0.0
    
    model_mlp.train()
    for batch in train_loader:
        x, y_true = batch
        y_logits_train = model_mlp(x)
        
        loss = loss_fn(y_logits_train, y_true)
        train_loss += loss
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    train_loss /= len(train_dataset)

    model_mlp.eval()
    with torch.no_grad():
        for batch in val_loader:
            x, y_true = batch
            y_logits_val = model_mlp(x)

            loss = loss_fn(y_logits_val, y_true)
            eval_loss += loss
            
    eval_loss /= len(val_dataset)
    
    history.append({
        'epoch': epoch,
        'train_loss': train_loss.item(),
        'eval_loss': eval_loss.item(),
    })

In [None]:
pd.DataFrame(history).plot(x='epoch', y=['train_loss', 'eval_loss'])

In [None]:
test_loss = 0.0

with torch.no_grad():
    for batch in test_loader:
        x, y_true = batch
        y_logits_test = model_mlp(x)
        loss = loss_fn(y_logits_test, y_true)
        test_loss += loss
test_loss /= len(test_dataset)

print(test_loss.item())

In [None]:
with torch.no_grad():
    y_pred_mlp_train = model_mlp(X_traintrain_tensor)
    y_pred_mlp_val = model_mlp(X_trainval_tensor)
    y_pred_mlp_test = model_mlp(X_test_tensor)

In [None]:
plot_score.plot_confusion_matrix(y_traintrain_tensor, y_pred_mlp_train.argmax(dim=1))
plot_score.plot_confusion_matrix(y_trainval_tensor, y_pred_mlp_val.argmax(dim=1))
plot_score.plot_confusion_matrix(y_test_tensor, y_pred_mlp_test.argmax(dim=1))
accuracy_train, precision_train, recall_train, f1_train = plot_score.score_train(y_traintrain_tensor, y_pred_mlp_train.detach().numpy().argmax(axis=1))
accuracy_val, precision_val, recall_val, f1_val = plot_score.score_train(y_trainval_tensor, y_pred_mlp_val.detach().numpy().argmax(axis=1))
accuracy_test, precision_test, recall_test, f1_test = plot_score.score_test(y_test_tensor, y_pred_mlp_test.detach().numpy().argmax(axis=1))

In [None]:
#CNN
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.conv_layer = nn.Sequential(
            nn.Conv1d(1, 16, 5),
            nn.ReLU(),
            nn.Conv1d(16, 32, 5),
            nn.ReLU(),
            nn.MaxPool1d(2, 2),
            nn.Conv1d(32, 64, 5),
            nn.ReLU(),
            nn.MaxPool1d(2, 2)
        )
        
        self.fc_layer = nn.Sequential(
            nn.Linear(87*64, 100),
            nn.ReLU(),
            nn.Linear(100, 5)
        )       
        
    def forward(self,x):
        out = self.conv_layer(x)
        out = out.view(-1, 87*64)
        out = self.fc_layer(out)

        return out

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_cnn = CNN().to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_cnn.parameters(), lr=0.01)

num_epochs = 30

In [None]:
for epoch in range(num_epochs):

    total_batch = len(train_data) // batch_size

    for i, (batch_images, batch_labels) in enumerate(train_loader):

        X = batch_images.cuda()
        Y = batch_labels.cuda()

        pre = model(X)
        cost = loss(pre, Y)

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        if (i+1) % 300 == 0:
            print('Epoch [%d/%d], lter [%d/%d], Loss: %.4f'
                 %(epoch+1, num_epochs, i+1, total_batch, cost.item()))

## Explain

In [None]:
h = .02
x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

for model in models:
    pre = model.predict(np.c_[xx.ravel(), yy.ravel()])
    pre = pre.reshape(xx.shape)
    plt.contourf(xx, yy, pre, cmap=plt.cm.coolwarm, alpha=0.8)

    plt.scatter(val_x[:, 0], val_x[:, 1], c=val_y, cmap=plt.cm.coolwarm)
    plt.xlabel('Sepal length')
    plt.ylabel('Sepal width')
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(())
    plt.yticks(())
    plt.title(model)
    plt.show()