In [1]:
import pickle
import matplotlib.pyplot as plt
import os
import numpy as np
import pandas as pd
%matplotlib inline 



In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

In [3]:
# Detect device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
def save_data(fp, data):
    with open(fp, "wb") as fo:
        pickle.dump(data, fo)

def load_data(fp):
    with open(fp, "rb") as fo:
        data = pickle.load(fo)
    return np.array(data)

In [5]:
def stack_folder_files(fp):
    data = []
    list_dir = os.listdir(fp) 
    list_dir.sort()
    for f in list_dir:
        if not f.startswith('.'):
            data.extend(load_data(fp+f))

    return np.array(data)

In [6]:
fp = "./data/"

In [None]:
data = stack_folder_files(fp)

In [8]:
len(data)

7898

In [9]:
x = []
y = []

for i in data:
    x.append(i[0])
    encoded_arr = np.zeros((1, 2), dtype=int)
    encoded_arr[0,i[1]] = 1 
    y.append(i[1])

In [10]:
# one-hot encoding 0-> [1,0], 1->[0,1]
y_one_hot = np.eye(2)[y]
y_one_hot

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]])

In [11]:
X_train, X_test, Y_train, Y_test = train_test_split(np.array(x).astype(np.float32), np.array(y_one_hot).astype(np.float32), test_size=0.2, random_state=42)

# Convert to PyTorch tensors and move to device later
X_train_tensor = torch.tensor(X_train)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float)
X_test_tensor = torch.tensor(X_test)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float)


# Create DataLoaders
train_loader = DataLoader(TensorDataset(X_train_tensor, Y_train_tensor), batch_size=64, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test_tensor, Y_test_tensor), batch_size=64)

In [12]:
class BinaryClassifier(nn.Module):
    def __init__(self):
        super(BinaryClassifier, self).__init__()
        self.fc1 = nn.Linear(26, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = F.softmax(x, dim=1)
        return x

In [13]:
model = BinaryClassifier().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
epochs = 50

for epoch in range(epochs):
    model.train()
    total_loss = 0.0

    for batch_x, batch_y in train_loader:
        # Move data to device
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device).float()  # one-hot labels must be float

        # Forward, backward, optimize
        optimizer.zero_grad()
        outputs = model(batch_x)              # logits: [batch_size, 2]
        loss = criterion(outputs, batch_y)    # one-hot: [batch_size, 2]
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")


In [None]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for batch_x, batch_y in test_loader:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device).float()  # one-hot, so keep as float

        outputs = model(batch_x)
        predicted = torch.argmax(outputs, dim=1)         # logits → class index
        true_labels = torch.argmax(batch_y, dim=1)       # one-hot → class index

        correct += (predicted == true_labels).sum().item()
        total += batch_y.size(0)

print(f"Test Accuracy: {100 * correct / total:.2f}%")


In [None]:
torch.save(model.state_dict(), 'binary_classifier_0515.pth')
print("Model saved to binary_classifier_0515.pth")

In [15]:
# Load model later
# Recreate the model architecture
model = BinaryClassifier().to(device)

# Load the saved weights
model.load_state_dict(torch.load('binary_classifier.pth'))

# Set to evaluation mode
model.eval()
print("Model loaded from binary_classifier.pth")

Model loaded from binary_classifier.pth


In [16]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for batch_x, batch_y in test_loader:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device).float()  # one-hot, so keep as float

        outputs = model(batch_x)
        predicted = torch.argmax(outputs, dim=1)         # logits → class index
        true_labels = torch.argmax(batch_y, dim=1)       # one-hot → class index

        correct += (predicted == true_labels).sum().item()
        total += batch_y.size(0)

print(f"Test Accuracy: {100 * correct / total:.2f}%")


Test Accuracy: 96.14%
