In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline


import math
import time
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

from IPython.display import display, clear_output

from sklearn.metrics import confusion_matrix, roc_auc_score, f1_score, recall_score, precision_score, accuracy_score
import sklearn.metrics as metrics
from sklearn.utils import shuffle
from sklearn.preprocessing import normalize, scale, StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset

import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms

from collections import OrderedDict, namedtuple
from itertools import product


#some initial setup
np.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)
np.random.seed(1234)
#torch.manual_seed(0)

In [None]:
class data_reader(Dataset):

    def __init__(self, transform=None, CSVpath="CSV Files/CSV-A-1.csv", col=[1, 2, 6], skiprows=100, max_rows_undamaged=4096,
                 max_rows_damaged=131072, batch=256, resample_factor=None):
    
        # data path
        col_list=['damaged data', 'undamaged data']
        df = pd.read_csv(CSVpath)
        damaged = df["damaged data"]
        damaged = damaged.replace('',np.nan)
        damaged = damaged.dropna( how="any")
        undamaged = df["undamaged data"]
        undamaged = undamaged.replace('',np.nan)
        undamaged = undamaged.dropna( how="any")

        col = col
        i=0
        j=0
        self.x_ud=np.empty((0,3), int)
        self.x_d=np.empty((0,3), int)

        
        # damaged
        for dpath in damaged:
            print(j, dpath)
            self.x_inp_d = np.loadtxt(dpath, skiprows=skiprows, usecols=col, max_rows=max_rows_damaged)
            
            
            
            if resample_factor==2:
                self.x_inp_d = self.x_inp_d[1::2]
            elif resample_factor==1:
                self.x_inp_d = self.x_inp_d[::2]
            
            self.x_d = np.append(self.x_d, self.x_inp_d, axis=0)
            j+=1 
            
            
        self.d_rows,_ = self.x_d.shape
        self.d_batch = int(self.d_rows/batch)
        self.y_d = np.ones(self.d_rows, dtype=int).reshape(self.d_rows, 1)
        
        
        # undamaged
        for udpath in undamaged:
            print(i, udpath)
            self.x_inp_ud = np.loadtxt(udpath, skiprows=skiprows, usecols=col, max_rows=max_rows_undamaged)
            

            if resample_factor==2:
                self.x_inp_ud = self.x_inp_ud[1::2]
            elif resample_factor==1:
                self.x_inp_ud = self.x_inp_ud[::2]
            
            
            
            self.x_ud = np.append(self.x_ud, self.x_inp_ud, axis=0)
            i+=1
            
            
        self.x_ud = self.x_ud[:self.d_rows, :]
        self.ud_rows,_ = self.x_ud.shape
        self.ud_batch = int(self.ud_rows/batch)
        self.y_ud = np.zeros(self.ud_rows, dtype=int).reshape(self.ud_rows, 1)


        

        
        
        # stack
        self.x = np.vstack((self.x_ud, self.x_d))
        self.y = np.vstack((self.y_ud, self.y_d))
        self.xy = np.hstack((self.x, self.y))
        self.xy = self.xy.reshape((self.ud_batch+self.d_batch, 1, batch, 4))
        self.xy = shuffle(self.xy, random_state=42)
        
        self.x = self.xy[:, :, :, [0,1,2]]
        self.y = self.xy[:, :, :, [3]]
        
        self.z=[]
        for i in range(self.ud_batch+self.d_batch):
            self.z = np.append(self.z, self.y[i, 0, batch-1, 0])
        self.y = np.reshape(self.z, (self.ud_batch+self.d_batch))
        
        self.transform = transform
        self.n_samples = self.x.shape[0]



    def __getitem__(self, index):
        sample = self.x[index], self.y[index]
        
        if self.transform:
            sample = self.transform(sample)
        return sample

    def __len__(self):
        return self.n_samples


In [None]:
class ToTensor:
    def __call__(self, sample):
        inputs, targets = sample
        return torch.from_numpy(inputs), torch.tensor(int(targets), dtype=torch.long)

In [None]:
class snr_noise:
    def __call__(self, sample):
        inputs, targets = sample
        
        target_snr_db = 5         # Target dB
        
        x_watts = inputs ** 2
        x_db = 10 * np.log10(x_watts)
        sig_avg_watts = np.mean(x_watts)
        sig_avg_db = 10 * np.log10(sig_avg_watts)
        noise_avg_db = sig_avg_db - target_snr_db
        noise_avg_watts = 10 ** (noise_avg_db / 10)
        mean_noise = 0
        noise_volts = np.random.normal(mean_noise, np.sqrt(noise_avg_watts), x_watts.shape)
        inputs = inputs + noise_volts
        return inputs, targets

In [None]:
tfms = transforms.Compose([
    ToTensor()
])

In [None]:
snr_noise_tfms = transforms.Compose([
    snr_noise(),
    ToTensor()
])

In [None]:
def namestr(obj, namespace):
    return [name for name in namespace if namespace[name] is obj]

In [None]:
def predict_class(model, dataloader):
    """ Predict probabilities for the given model and dataset
    """
    with torch.no_grad():
        device = torch.device(run.device)
        result = []
        y = []
        for inputs, targets in tqdm(dataloader):
            
            inputs = inputs.to(device)
            scores = model(inputs)
            preds = scores.data.sign() / 2 + 0.5
            result += [preds.cpu().numpy()]
            y += [targets.cpu().numpy()]

        result = np.concatenate(result).reshape(-1).astype(int)
        y = np.concatenate(y)
    return result, y

In [None]:
def predict_class_names(model, dataloader, class_names=['undamaged', 'damaged']):
    """ Predict probabilities for the given model and dataset
    
        Inputs:
            - model: a pytorch model
            - dataloader a torch.utils.data.DataLoader object
            - class_names: a list of class names
            
        Output:
            - result: Predicted class name for each input as a python list
    """
    with torch.no_grad():
        
        result = []
        y = []
        model = model.to(device)
        for inputs, labels in tqdm(dataloader):
            inputs = to_var(inputs)
            scores = model(inputs)
            preds = scores.data.sign() / 2 + 0.5
            result += [preds.cpu().numpy()]
            y += [labels.cpu().numpy()]
            
        result = np.concatenate(result).reshape(-1).astype(int)
        y = np.concatenate(y)
        
        pred_class_names = [class_names[i] for i in result]
    print('Damage Possibility: ', 100*sum(result)/len(result), ' %', 'actual damage:', 100*sum(y)/len(y), ' %')
    return  pred_class_names, y

In [None]:
def predict_class_DP_AVG(model, dataloader, number=5):
    """ Predict probabilities for the given model and dataset
    """
    model = model.to(device)
    Results = []
    since = time.time()
    for i in range(number):
        
        with torch.no_grad():
            
            Result = []
            result = []
            y = []
            for inputs, targets in tqdm(dataloader):
                inputs = to_var(inputs)
                scores = model(inputs)
                preds = scores.data.sign() / 2 + 0.5
                result += [preds.cpu().numpy()]
                y += [targets.cpu().numpy()]
        
            result = np.concatenate(result).reshape(-1).astype(int)
            y = np.concatenate(y)
            Result = 100*sum(result)/len(result)
        Results.append(Result)
    times = time.time() - since
    return Results , 'Average DP:', np.average(Results), 'Average Time: ', times/number

In [None]:
def predict_CPU(model, dataloader, number=5):
    """ Predict probabilities for the given model and dataset
    """
    model = model.cpu()
    Results = []
    since = time.time()
    for i in range(number):
        
        with torch.no_grad():
            
            Result = []
            result = []
            y = []
            
            for inputs, targets in tqdm(dataloader):
                scores = model(inputs)
                preds = scores.data.sign() / 2 + 0.5
                result += [preds.cpu().numpy()]
                y += [targets.cpu().numpy()]
        
            result = np.concatenate(result).reshape(-1).astype(int)
            y = np.concatenate(y)
            Result = 100*sum(result)/len(result)
        Results.append(Result)
    times = time.time() - since
    return Results , 'Average DP:', np.average(Results), 'Average Time: ', times/number

In [None]:
class HingeLoss(torch.nn.Module):
    def __init__(self):
        super(HingeLoss, self).__init__()

    def forward(self, inputs, target):
        L = (1 - target * inputs).clamp(min=0)
        return torch.mean(L)

In [None]:
class RunBuilder():
    @staticmethod
    def get_runs(params):
        
        Run = namedtuple('Run', params.keys())
        
        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))
            
        return runs

In [None]:
net_id = 1
column=[29,30,25]
Signal_Lenth=256

#max rows undamaged:
max_rows_undamaged_t=17874  #for tripple-damaged cases


train_ds = data_reader(transform=tfms, CSVpath=f"CSV Files/CSV-A-{net_id}.csv",
                       col=column, skiprows=11, max_rows_undamaged=max_rows_undamaged_t, max_rows_damaged=196608,
                       batch=Signal_Lenth, resample_factor=None)

#max rows undamaged:
max_rows_undamaged_v=938   #for single-damaged cases

valid_ds = data_reader(transform=tfms, CSVpath=f"CSV Files/CSV-B-{net_id}.csv",
                       col=column, skiprows=196619, max_rows_undamaged=max_rows_undamaged_v, max_rows_damaged=32768,
                       batch=Signal_Lenth, resample_factor=None)

In [None]:
## resample_factore: 1 for odd lines and 2 for even lines

net_id = 30
column=[29,30,25]
Signal_Lenth=256
resampling_factor = 'half'

## max rows undamaged:
max_rows_undamaged_t=7490   #for single-damaged cases

train_ds = data_reader(transform=tfms, CSVpath=f"CSV Files/CSV-A-{net_id}.csv",
                       col=column, skiprows=11, max_rows_undamaged=max_rows_undamaged_t, max_rows_damaged=262144,
                       batch=Signal_Lenth, resample_factor=1)

## max rows undamaged:
max_rows_undamaged_v=1874   #for single-damaged cases

valid_ds = data_reader(transform=tfms, CSVpath=f"CSV Files/CSV-B-{net_id}.csv",
                       col=column, skiprows=11, max_rows_undamaged=max_rows_undamaged_v, max_rows_damaged=65536,
                       batch=Signal_Lenth, resample_factor=2)

In [None]:
len(train_ds), len(valid_ds)

In [None]:
first_data = train_ds[0]
print(first_data)
features, labels = first_data
print(features.shape, labels.shape)
print(labels)
print(type(features), type(labels))
plt.plot(features[0, :, 0])
plt.xlabel('sample step')
plt.ylabel('amplitude')
plt.show()


In [None]:
first_data = valid_ds[0]
print(first_data)
features, labels = first_data
print(features.shape, labels.shape)
print(labels)
print(type(features), type(labels))
plt.plot(features[0, :, 0])
plt.xlabel('smple step')
plt.ylabel('amplitude')
plt.show()

In [None]:
class CNN_SVM(nn.Module):
    
    def __init__(self):
        super(CNN_SVM, self).__init__()
        
        self.bn = nn.BatchNorm2d(1)
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=(9,3), padding=(4,1)),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.MaxPool2d((2, 1))
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(8, 16, kernel_size=(3,3), padding=(1,1)),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d((4, 1))
        )
        
        self.dropout = nn.Dropout2d(p=0.5)
        
        self.svm = nn.Linear(16*32*3 , 1)
        
    def forward(self, x):
        x = self.bn(x)
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.dropout(out)
        out = out.view(out.size(0), -1)
        out = self.svm(out)
        return out

In [None]:
params = OrderedDict(
    lr = [0.01],
    device = ["cuda"],
    batch_size = [8],
    epoch_number = [30],
    shuffle = [True],
)

In [None]:
for run in RunBuilder.get_runs(params):    
    valid_dl = DataLoader(dataset=valid_ds, batch_size=run.batch_size, shuffle=False)

In [None]:
criterion = HingeLoss()

accuracy_list = []
accuracy_list_v = []
loss_list = []
loss_list_v = []
with open(f'Loss/losses-svm-{net_id}-b{run.batch_size}-signal_lenth-{Signal_Lenth}-sample_rate-{resampling_factor}.txt', 'w') as fa:
    
    for run in RunBuilder.get_runs(params):

        device = torch.device(run.device)
        model = CNN_SVM().double()
        network = model.to(device)
        loader = DataLoader(train_ds, batch_size=run.batch_size, shuffle=run.shuffle)
        optimizer = optim.Adam(network.parameters(), lr=run.lr, weight_decay=1e-5)

        scheduler2 = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[4,20,25], gamma=0.1, verbose=True)

        
        if not os.path.exists(f'wts/svm-{net_id}-b{run.batch_size}-signal_lenth-{Signal_Lenth}-sample_rate-{resampling_factor}'):
            os.mkdir(f'wts/svm-{net_id}-b{run.batch_size}-signal_lenth-{Signal_Lenth}-sample_rate-{resampling_factor}')
            
        
        since = time.time()
        best_model_wts = network.state_dict()
        best_acc = 0.0
        epoch_loss = 0
        epoch_loss_v = 0
        epoch_acc = 0
        epoch_acc_v = 0
        
        for epoch in range(run.epoch_number):
            total_loss = 0
            total_loss_v=0
            
            for batch in loader:

                inputs = batch[0].to(device)
                labels = batch[1].to(device)
                labels = Variable(2 * (labels.float() - 0.5))
                outputs = network(inputs)


                loss = criterion(outputs.t(), labels)

                optimizer.zero_grad()

                loss.backward()

                optimizer.step()
                total_loss += loss.item()

            epoch_loss = (total_loss/len(loader))
            
            print('Epoch: [% d/% d], Loss: %.4f'
                  % (epoch + 1, run.epoch_number, total_loss/len(loader)))


            scheduler2.step()
            
            
        # valid Dataset
            correct_v = 0.
            total_v = 0.
            for batch in valid_dl:
                inputs_v = batch[0].to(device)
                labels_v = batch[1].to(device)
                
                labels_test_pred = model(inputs_v)
                prediction_v = labels_test_pred.data.sign() / 2 + 0.5
                
                loss_v = criterion(labels_test_pred.t(), labels)
                
                total_loss_v += loss_v.item()
                
                correct_v += (prediction_v.view(-1).long() == labels_v.data).sum()
                total_v += inputs_v.shape[0]
                
                total_loss_v += loss_v.item()
            epoch_loss_v = (total_loss_v/len(valid_dl))   
        
        # Train Dataset 
            correct = 0.
            total = 0.
            for batch in loader:
                inputs = batch[0].to(device)
                labels = batch[1].to(device)

                labels_train_pred = network(inputs)
                prediction = labels_train_pred.data.sign() / 2 + 0.5

                correct += (prediction.view(-1).long() == labels.data).sum()
                total += inputs.shape[0]
            
            epoch_acc = correct.float() / total
            epoch_acc_v = correct_v.float() / total_v  
            
        # deep copy the model
            if epoch_acc_v > best_acc:
                best_acc = epoch_acc_v
                best_model_wts = network.state_dict().copy()
                torch.save(best_model_wts, f"./wts/svm-{net_id}-b{run.batch_size}-signal_lenth-{Signal_Lenth}-sample_rate-{resampling_factor}/epoch-{epoch}-acc-{best_acc}.pth")
                
            print('Train Accuracy: %f  \t Valid Accuracy: %f \t%%' % (100 * (correct.float() / total), (100 * (correct_v.float() / total_v))))

            
            accuracy_list.append(epoch_acc.cpu())
            accuracy_list_v.append(epoch_acc_v.cpu())      
            loss_list.append(epoch_loss)
            loss_list_v.append(epoch_loss_v)
            
            fa.write("\n")
            fa.write('Train Accuracy: %f  \t Valid Accuracy: %f \t%%' % (100 * (correct.float() / total), (100 * (correct_v.float() / total_v))))
            fa.write("\t Epoch Loss train: %f\t Epoch Loss valid: %f\t" % (epoch_loss, epoch_loss_v))
        times = time.time() - since 
        print('time: ', times)

In [None]:
#loading model
for run in RunBuilder.get_runs(params):
    
    device = torch.device(run.device)
    model = CNN_SVM().double()
    network = model.to(device)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    losses = []
    Accuracy_tracker = []

checkpoint = torch.load(f'Saved_Model\\cnn_svm{net_id}-b{run.batch_size}-signal_lenth-{Signal_Lenth}-sample_rate-{resampling_factor}.pt')
network.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch_number = checkpoint['epoch']
loss_list = checkpoint['losses']
loss_list_v = checkpoint['losses_v']
accuracy_list = checkpoint['Acc']
accuracy_list_v = checkpoint['Acc_v']
criterion = checkpoint['criterion'] 
#network.train()
network.eval()

In [None]:
plt.figure(figsize=(12,4))
plt.plot(loss_list, '-o')
plt.plot(loss_list_v, '-o')
plt.legend(['Train', 'Valid'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Hinge')

In [None]:
plt.figure(figsize=(12,4))
#plt.ylim([0, 1])
X_axis = range(run.epoch_number)
plt.xticks(X_axis)
plt.plot(accuracy_list, '-o')
plt.plot(accuracy_list_v, '-o')
plt.legend(['Train', 'Valid'])
plt.xlabel('Epoch')
plt.ylabel('Acc')
plt.title('Accuracy')

In [None]:
network.eval()
train_dl = DataLoader(dataset=train_ds, batch_size=run.batch_size, shuffle=False)

In [None]:
#load = train_dl
#load = valid_dl

load = [train_dl, valid_dl]
acc=[]
pre=[]
re=[]
f=[]
au=[]

print(f'Network No. {net_id} \t batch_size: {run.batch_size} \t signal_lenth: {Signal_Lenth}')

for l in load:
    
    y_pred, y_true = predict_class(network, l)
    print(namestr(l,globals()))
    #print('\n', 'Exprience No.', i+1,':')
    
    report_list=[]
    
    # accuracy: (tp + tn) / (p + n)
    accuracy = accuracy_score(y_true=y_true,y_pred=y_pred)
    #acc.append(accuracy)
    report_list.append(accuracy)
    print('Accuracy: %f' % accuracy)

    # precision tp / (tp + fp)
    precision = precision_score(y_pred=y_pred, y_true=y_true)
    #pre.append(precision)
    report_list.append(precision)
    print('Precision: %f' % precision)

    # recall: tp / (tp + fn)
    recall = recall_score(y_pred=y_pred, y_true=y_true)
    #re.append(recall)
    report_list.append(recall)
    print('Recall: %f' % recall)

    # f1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_pred=y_pred, y_true=y_true)
    #f.append(f1)
    report_list.append(f1)
    print('F1 score: %f' % f1)

    # ROC AUC
    auc = roc_auc_score(y_true=y_true, y_score=y_pred)
    #au.append(auc)
    report_list.append(auc)
    print('ROC AUC: %f' % auc, '\n')
    for rep in report_list:
        print(rep)
    print('\n')

In [None]:
cm = confusion_matrix(y_true, y_pred)
plot_confusion_matrix(cm, ['udamaged', 'damaged'], figsize=(5,5), normalize=False)

In [None]:
#saving model
torch.save({
    'epoch' : run.epoch_number,
    'losses' : loss_list,
    'losses_v' : loss_list_v,
    'Acc' : accuracy_list,
    'Acc_v': accuracy_list_v,
    'model_state_dict': network.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'criterion' : criterion,
},f'Saved_Model\\cnn_svm{net_id}-b{run.batch_size}-signal_lenth-{Signal_Lenth}-sample_rate-{resampling_factor}.pt')

In [None]:
class test_reader(Dataset):

    def __init__(self, transform=None, CSVpath="CSV Files/CSV-B-1.csv", col=[1, 2, 6], skiprows=100, max_rows_undamaged=4096,
                 max_rows_damaged=131072, batch=256, state=0, case=0):
    
        # data path
        col_list=['damaged data', 'undamaged data']
        df = pd.read_csv(CSVpath)
        damaged = df["damaged data"]
        damaged = damaged.replace('',np.nan)
        damaged = damaged.dropna( how="any")
        undamaged = df["undamaged data"]
        undamaged = undamaged.replace('',np.nan)
        undamaged = undamaged.dropna( how="any")

        col = col
        i=0
        j=0
        self.x_ud=np.empty((0,3), int)
        self.x_d=np.empty((0,3), int)

        if state ==0:
            
            # undamaged
            print(case, undamaged[case])
            self.x_ud = np.loadtxt(undamaged[case], skiprows=skiprows, usecols=col, max_rows=max_rows_undamaged)

            
            self.ud_rows,_ = self.x_ud.shape
            self.ud_batch = int(self.ud_rows/batch)
            self.y_ud = np.zeros(self.ud_rows, dtype=int).reshape(self.ud_rows, 1)
            self.x = self.x_ud
            self.y = self.y_ud
            self.batch = self.ud_batch
            
        else:
            
            # damaged
            print(case, damaged[case])
            self.x_d = np.loadtxt(damaged[case], skiprows=skiprows, usecols=col, max_rows=max_rows_damaged)
            
            
            self.d_rows,_ = self.x_d.shape
            self.d_batch = int(self.d_rows/batch)
            self.y_d = np.ones(self.d_rows, dtype=int).reshape(self.d_rows, 1)
            self.x = self.x_d
            self.y = self.y_d
            self.batch = self.d_batch
        
        
        # stack
        self.xy = np.hstack((self.x, self.y))
        self.xy = self.xy.reshape((self.batch, 1, 256, 4))
        self.xy = shuffle(self.xy, random_state=42)
        
        self.x = self.xy[:, :, :, [0,1,2]]
        self.y = self.xy[:, :, :, [3]]
        
        self.z=[]
        for i in range(self.batch):
            self.z = np.append(self.z, self.y[i, 0, 254, 0])
        self.y = np.reshape(self.z, (self.batch))
        
        self.transform = transform
        self.n_samples = self.x.shape[0]



    def __getitem__(self, index):
        sample = self.x[index], self.y[index]
        
        if self.transform:
            sample = self.transform(sample)
        return sample

    def __len__(self):
        return self.n_samples


In [None]:
#state: 0 for undamaged cases and 1 for damaged

for i in range(35):
    test_ds = test_reader(transform=tfms, CSVpath="CSV Files\CSV-B-30.csv", col=[29,30,25], skiprows=196619,
                            max_rows_undamaged=62720,
                            max_rows_damaged=62720, batch=256, state=0, case=i)
    test_dl = DataLoader(test_ds, batch_size=8, shuffle=False)

    predict_class_names(network, test_dl, ['Undamage', 'Damage'])


In [None]:
predict_class_names(network, valid_dl, ['Undamage', 'Damage'])

In [None]:
predict_class_DP_AVG(network, test_dl, number=30)

In [None]:
predict_CPU(network, test_dl, number=30)

In [None]:
first_data = test_ds[0]
print(first_data)
features, labels = first_data
print(features.shape, labels.shape)
print(labels)
print(type(features), type(labels))
plt.plot(features[0, :, 0])
plt.show()

In [None]:
#saving model
torch.save({
    'epoch' : run.epoch_number,
    'losses' : loss_list,
    'losses_v' : loss_list_v,
    'Acc' : accuracy_list,
    'Acc_v': accuracy_list_v,
    'model_state_dict': network.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'criterion' : criterion,
},f'Saved_Model\\cnn_svm{net_id}-b{run.batch_size}-signal_lenth-{Signal_Lenth}.pt')