In [56]:
import numpy as np
import pickle
import random
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader,Dataset,SequentialSampler,BatchSampler
from torchsummary import summary



In [57]:
file_path1 = './samples/pos_class_train.pkl'
file_path2 = './samples/neg_class_train.pkl'
file_path3='./samples/testing_augmented.pkl'

with open(file_path1, 'rb') as f:
    # Load the contents of the pickle file
    pos_class_train = pickle.load(f)

with open(file_path2, 'rb') as f:
    # Load the contents of the pickle file
    neg_class_train = pickle.load(f)


In [58]:
class CustomOversampledDataset(Dataset):
    def __init__(self, class_a, class_b):
        # Assume class_a and class_b are lists of tuples (features, label)
        if len(class_a) > len(class_b):
            self.majority_class = class_a
            self.minority_class = class_b
        else:
            self.majority_class = class_b
            self.minority_class = class_a

        self.oversample_minority()

    def oversample_minority(self):
        # Calculate how many times to repeat the minority class
        repeat_times = len(self.majority_class) // len(self.minority_class)
        self.data = self.majority_class + self.minority_class * repeat_times
        # Add remainder if necessary
        remainder = len(self.majority_class) % len(self.minority_class)
        if remainder:
            self.data += self.minority_class[:remainder]
        
        # Shuffle to mix the samples using torch.randperm
        perm = torch.randperm(len(self.data))
        self.data = [self.data[i] for i in perm]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        features, label = self.data[idx]
        return torch.tensor(features, dtype=torch.float32), torch.tensor(label, dtype=torch.long)


In [59]:
class NonRepeatingBatchSampler(BatchSampler):
    def __init__(self, data_source, batch_size, drop_last=True):
        super().__init__(SequentialSampler(data_source), batch_size, drop_last)
        self.data_source = data_source

    def __iter__(self):
        batch = []
        seen_ids = set()
        for idx in torch.randperm(len(self.data_source)):
            sample_id = id(self.data_source.data[idx])
            if sample_id not in seen_ids or len(seen_ids) == len(self.data_source): # Check if all samples have been seen
                batch.append(idx)
                seen_ids.add(sample_id)
                if len(batch) == self.batch_size:
                    yield batch
                    batch = []
                    seen_ids = set()
        if batch and not self.drop_last:
            yield batch


In [60]:
dataset = CustomOversampledDataset(pos_class_train, neg_class_train)
sampler = NonRepeatingBatchSampler(dataset, batch_size=32)
train_loader = DataLoader(dataset, batch_sampler=sampler)


In [61]:
class CombinedCustomDataset(Dataset):
    def __init__(self, file_path):
        with open(file_path, 'rb') as f:
            self.data = pickle.load(f)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        features, label = self.data[idx]
        features = torch.FloatTensor(features)
        label = torch.tensor(label, dtype=torch.float32)
        return features, label
test_dataset = CombinedCustomDataset(file_path3)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [62]:
class Train_Model(nn.Module):
    def __init__(self, in_channel=2, input_length=6250):
        super(Train_Model,self).__init__()
        self.c1=nn.Conv1d(in_channel,out_channels=5,kernel_size=10,stride=1)
        self.p1=nn.MaxPool1d(kernel_size=2,stride=2)
        self.c2=nn.Conv1d(in_channels=5,out_channels=10,kernel_size=10,stride=1)
        self.p2=nn.MaxPool1d(kernel_size=2,stride=2)
        self.c3 = nn.Conv1d(10, 10, kernel_size=10, stride=1)
        self.p3 =nn.MaxPool1d(kernel_size=2,stride=2)
        self.c4 = nn.Conv1d(10, 15, kernel_size=5, stride=1)
        self.p4 =nn.MaxPool1d(kernel_size=2,stride=2)
        self.flatten=nn.Flatten()


        self.fc1=nn.Linear(5760,64)
        self.fc2=nn.Linear(64,20)
        self.output=nn.Linear(20,1)

    def forward(self,x):
        x=self.c1(x)
        x = self.p1(x)
        x=nn.LeakyReLU()(x)
        x = self.c2(x)
        x = nn.LeakyReLU()(x)
        x = self.p2(x)
        x = self.c3(x)
        x = nn.LeakyReLU()(x)
        x = self.p3(x)
        x = self.c4(x)
        x = nn.LeakyReLU()(x)
        x = self.p4(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = nn.LeakyReLU()(x)
        x = self.fc2(x)
        x = nn.LeakyReLU()(x)
        x = torch.sigmoid(self.output(x))
        return x

In [63]:
model=Train_Model(in_channel=2,input_length=6250)
criterian=nn.BCELoss()
optimizers=torch.optim.Adam(model.parameters(),lr=0.0001)
summary(model, (2, 6250))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1              [-1, 5, 6241]             105
         MaxPool1d-2              [-1, 5, 3120]               0
            Conv1d-3             [-1, 10, 3111]             510
         MaxPool1d-4             [-1, 10, 1555]               0
            Conv1d-5             [-1, 10, 1546]           1,010
         MaxPool1d-6              [-1, 10, 773]               0
            Conv1d-7              [-1, 15, 769]             765
         MaxPool1d-8              [-1, 15, 384]               0
           Flatten-9                 [-1, 5760]               0
           Linear-10                   [-1, 64]         368,704
           Linear-11                   [-1, 20]           1,300
           Linear-12                    [-1, 1]              21
Total params: 372,415
Trainable params: 372,415
Non-trainable params: 0
-------------------------------

In [64]:
model.train()
epochs=70
losses=[]
for epoch in range(epochs):
    for inputs,outputs in train_loader:
        outputs=outputs.float()
        y_pred=model.forward(inputs).squeeze()
        loss=criterian(y_pred,outputs)
        losses.append(loss)
        optimizers.zero_grad()
        loss.backward()
        optimizers.step()

    print('epoch {}/{}:  Loss: {}'.format(epoch,epochs,loss.item()))

epoch 0/70:  Loss: 0.6958727836608887
epoch 1/70:  Loss: 0.6359607577323914
epoch 2/70:  Loss: 0.5279501676559448
epoch 3/70:  Loss: 0.5587435960769653
epoch 4/70:  Loss: 0.5982241630554199
epoch 5/70:  Loss: 0.5619750618934631
epoch 6/70:  Loss: 0.5821767449378967
epoch 7/70:  Loss: 0.49157285690307617
epoch 8/70:  Loss: 0.44785910844802856
epoch 9/70:  Loss: 0.43653279542922974
epoch 10/70:  Loss: 0.3752642273902893
epoch 11/70:  Loss: 0.4658612608909607
epoch 12/70:  Loss: 0.42323076725006104
epoch 13/70:  Loss: 0.5785524845123291
epoch 14/70:  Loss: 0.41239941120147705
epoch 15/70:  Loss: 0.2316867560148239
epoch 16/70:  Loss: 0.39910703897476196
epoch 17/70:  Loss: 0.39036881923675537
epoch 18/70:  Loss: 0.49795085191726685
epoch 19/70:  Loss: 0.4272502064704895
epoch 20/70:  Loss: 0.5759829878807068
epoch 21/70:  Loss: 0.3793007433414459
epoch 22/70:  Loss: 0.41361236572265625
epoch 23/70:  Loss: 0.4744502604007721
epoch 24/70:  Loss: 0.3662857413291931
epoch 25/70:  Loss: 0.4179

In [66]:
model.eval()
total_accuracy=0

with torch.no_grad():
    for inputs, targets in test_loader:
        test_preds = model(inputs).squeeze()  # Squeeze to ensure dimension match
        test_loss = criterian(test_preds, targets)
        
        # Calculate accuracy
        predicted = test_preds.round()  # Assuming threshold of 0.5
        accuracy = (predicted == targets).float().mean().item()
        total_accuracy += accuracy

    avg_accuracy = total_accuracy / len(test_loader)
    print(f'Test Loss: {test_loss.item()}, Average Accuracy: {avg_accuracy}')



Test Loss: 0.29196977615356445, Average Accuracy: 0.7043269230769231
