In [61]:
import numpy as np
import pickle
import random
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader,Dataset,SequentialSampler,BatchSampler
import tensorflow as tf


In [62]:
file_path1 = './samples/pos_class_train.pkl'
file_path2 = './samples/neg_class_train.pkl'
file_path3='./samples/testing_data.pkl'

with open(file_path1, 'rb') as f:
    # Load the contents of the pickle file
    pos_class_train = pickle.load(f)

with open(file_path2, 'rb') as f:
    # Load the contents of the pickle file
    neg_class_train = pickle.load(f)


In [63]:
class CustomOversampledDataset(Dataset):
    def __init__(self, class_a, class_b):
        # Assume class_a and class_b are lists of tuples (features, label)
        if len(class_a) > len(class_b):
            self.majority_class = class_a
            self.minority_class = class_b
        else:
            self.majority_class = class_b
            self.minority_class = class_a

        self.oversample_minority()

    def oversample_minority(self):
        # Calculate how many times to repeat the minority class
        repeat_times = len(self.majority_class) // len(self.minority_class)
        self.data = self.majority_class + self.minority_class * repeat_times
        # Add remainder if necessary
        remainder = len(self.majority_class) % len(self.minority_class)
        if remainder:
            self.data += self.minority_class[:remainder]
        
        # Shuffle to mix the samples using torch.randperm
        perm = torch.randperm(len(self.data))
        self.data = [self.data[i] for i in perm]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        features, label = self.data[idx]
        return torch.tensor(features, dtype=torch.float32), torch.tensor(label, dtype=torch.long)


In [64]:
class NonRepeatingBatchSampler(BatchSampler):
    def __init__(self, data_source, batch_size, drop_last=True):
        super().__init__(SequentialSampler(data_source), batch_size, drop_last)
        self.data_source = data_source

    def __iter__(self):
        batch = []
        seen_ids = set()
        for idx in torch.randperm(len(self.data_source)):
            sample_id = id(self.data_source.data[idx])
            if sample_id not in seen_ids or len(seen_ids) == len(self.data_source): # Check if all samples have been seen
                batch.append(idx)
                seen_ids.add(sample_id)
                if len(batch) == self.batch_size:
                    yield batch
                    batch = []
                    seen_ids = set()
        if batch and not self.drop_last:
            yield batch


In [65]:
dataset = CustomOversampledDataset(pos_class_train, neg_class_train)
sampler = NonRepeatingBatchSampler(dataset, batch_size=32)
train_loader = DataLoader(dataset, batch_sampler=sampler)


In [66]:
class CombinedCustomDataset(Dataset):
    def __init__(self, file_path):
        with open(file_path, 'rb') as f:
            self.data = pickle.load(f)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        features, label = self.data[idx]
        features = torch.FloatTensor(features)
        label = torch.tensor(label, dtype=torch.float32)
        return features, label
test_dataset = CombinedCustomDataset(file_path3)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [67]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

model = NeuralNetwork(800)
criterian = nn.BCELoss()
optimizers = optim.Adam(model.parameters(), lr=0.001)



In [68]:
model.train()
epochs=70
losses=[]
for epoch in range(epochs):
    for inputs,outputs in train_loader:
        y_pred=model.forward(inputs).squeeze()
        outputs=outputs.float()
        loss=criterian(y_pred,outputs)
        losses.append(loss)
        optimizers.zero_grad()
        loss.backward()
        optimizers.step()

    print('epoch {}/{}:  Loss: {}'.format(epoch,epochs,loss.item()))

epoch 0/70:  Loss: 0.5962590575218201
epoch 1/70:  Loss: 0.5711008906364441
epoch 2/70:  Loss: 0.7440171837806702
epoch 3/70:  Loss: 0.5756639242172241
epoch 4/70:  Loss: 0.1681673228740692
epoch 5/70:  Loss: 0.4336845278739929
epoch 6/70:  Loss: 0.23520123958587646
epoch 7/70:  Loss: 0.36252841353416443
epoch 8/70:  Loss: 0.18301606178283691
epoch 9/70:  Loss: 0.16134323179721832
epoch 10/70:  Loss: 0.14620748162269592
epoch 11/70:  Loss: 0.261696994304657
epoch 12/70:  Loss: 0.33758658170700073
epoch 13/70:  Loss: 0.41977033019065857
epoch 14/70:  Loss: 0.07452325522899628
epoch 15/70:  Loss: 0.22560684382915497
epoch 16/70:  Loss: 0.13375405967235565
epoch 17/70:  Loss: 0.09791721403598785
epoch 18/70:  Loss: 0.24616611003875732
epoch 19/70:  Loss: 0.02682715654373169
epoch 20/70:  Loss: 0.06648076325654984
epoch 21/70:  Loss: 0.04572266340255737
epoch 22/70:  Loss: 0.06912669539451599
epoch 23/70:  Loss: 0.02961690165102482
epoch 24/70:  Loss: 0.01137927733361721
epoch 25/70:  Loss

In [69]:
model.eval()
total_accuracy=0

with torch.no_grad():
    for inputs, targets in test_loader:
        test_preds = model(inputs).squeeze() 
        test_loss = criterian(test_preds, targets)
        predicted = test_preds.round() 
        accuracy = (predicted == targets).float().mean().item()
        total_accuracy += accuracy

    avg_accuracy = total_accuracy / len(test_loader)
    print(f'Test Loss: {test_loss.item()}, Average Accuracy: {avg_accuracy}')



Test Loss: 0.08726920932531357, Average Accuracy: 0.65625
