In [24]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from neuron import MLP
import torch
from sklearn.preprocessing import StandardScaler

DATA_DIR = Path("../../data")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
trn_data_pos = pd.read_csv(DATA_DIR/'michal'/'pos_trn.csv')
trn_data_pos['is_positive'] = 1
trn_data_neg = pd.read_csv(DATA_DIR/'michal'/'neg_trn.csv')
trn_data_neg['is_positive'] = 0
trn_data = pd.concat([trn_data_pos, trn_data_neg], axis=0)
trn_data.fillna(-1, inplace=True)
shuffled_data = trn_data.sample(frac=1, random_state=42).reset_index(drop=True)
VAL_SPLIT = 0.2
split_idx = int(VAL_SPLIT * shuffled_data.shape[0])
val_data = shuffled_data.iloc[:split_idx]
trn_data = shuffled_data.iloc[split_idx:]



In [26]:
X = trn_data.drop(columns=['is_positive'])
y = trn_data['is_positive']
X_val = val_data.drop(columns=['is_positive'])
y_val = val_data['is_positive']


In [27]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_val = scaler.transform(X_val)
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y.values, dtype=torch.float32).reshape(-1, 1)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val.values, dtype=torch.float32).reshape(-1, 1)


In [28]:
train_dataset = TensorDataset(X, y)
val_dataset = TensorDataset(X_val, y_val)
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

In [29]:
def train_mlp(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            # Zero the gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
        # Validation phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
                
        print(f'Epoch {epoch+1}, Train Loss: {train_loss/len(train_loader):.4f}, '
              f'Val Loss: {val_loss/len(val_loader):.4f}')

In [39]:
model = MLP(input_size=X.shape[1], hidden_sizes=[32,32], output_size=1)
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

# Training the model
print("Starting training...")
train_mlp(model, train_loader, val_loader, criterion, optimizer, epochs=20)

# Evaluate model on validation set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, targets in val_loader:
        outputs = model(inputs)
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total += targets.size(0)
        correct += (predicted == targets).sum().item()
        
print(f'Validation Accuracy: {100 * correct / total:.2f}%')

Starting training...
Epoch 1, Train Loss: 0.7452, Val Loss: 0.6935
Epoch 2, Train Loss: 0.6945, Val Loss: 0.6932
Epoch 3, Train Loss: 0.6936, Val Loss: 0.6932
Epoch 4, Train Loss: 0.6934, Val Loss: 0.6931
Epoch 5, Train Loss: 0.6933, Val Loss: 0.6931
Epoch 6, Train Loss: 0.6933, Val Loss: 0.6931
Epoch 7, Train Loss: 0.6932, Val Loss: 0.6931
Epoch 8, Train Loss: 0.6932, Val Loss: 0.6931
Epoch 9, Train Loss: 0.6932, Val Loss: 0.6931
Epoch 10, Train Loss: 0.6932, Val Loss: 0.6931
Epoch 11, Train Loss: 0.6932, Val Loss: 0.6931
Epoch 12, Train Loss: 0.6932, Val Loss: 0.6931
Epoch 13, Train Loss: 0.6932, Val Loss: 0.6931
Epoch 14, Train Loss: 0.6932, Val Loss: 0.6931
Epoch 15, Train Loss: 0.6932, Val Loss: 0.6931
Epoch 16, Train Loss: 0.6932, Val Loss: 0.6931
Epoch 17, Train Loss: 0.6932, Val Loss: 0.6931
Epoch 18, Train Loss: 0.6932, Val Loss: 0.6931
Epoch 19, Train Loss: 0.6932, Val Loss: 0.6931
Epoch 20, Train Loss: 0.6932, Val Loss: 0.6931
Validation Accuracy: 91.21%


In [40]:
pos_tst = pd.read_csv(DATA_DIR/'michal'/'pos_tst.csv')
pos_tst['is_positive'] = 1
neg_tst = pd.read_csv(DATA_DIR/'michal'/'neg_tst.csv')
neg_tst['is_positive'] = 0
tst_data = pd.concat([pos_tst, neg_tst], axis=0)
tst_data.fillna(-1, inplace=True)
tst_data = tst_data.sample(frac=1, random_state=42).reset_index(drop=True)
X_tst = tst_data.drop(columns=['is_positive'])
y_tst = tst_data['is_positive']
X_tst = scaler.transform(X_tst)
X_tst = torch.tensor(X_tst, dtype=torch.float32)
y_tst = torch.tensor(y_tst.values, dtype=torch.float32).reshape(-1, 1)
tst_dataset = TensorDataset(X_tst, y_tst)
tst_loader = DataLoader(tst_dataset, batch_size=batch_size, shuffle=True)

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, targets in tst_loader:
        outputs = model(inputs)
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

print(f'Test Accuracy: {100 * correct / total:.2f}%')



Test Accuracy: 91.49%
