## 1. GPU / PyTorch check

In [27]:
import torch
import torch.nn as nn
import torch.optim as optim

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))

Using device: cuda
GPU Name: NVIDIA GeForce RTX 4060


# 2. Dummy dataset

In [28]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 100 samples, 5 features, binary classification
X, y = make_classification(n_samples=100, n_features=5, n_informative=3, n_classes=2, random_state=42)

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1,1).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.float32).view(-1,1).to(device)

print("Training set:", X_train.shape, y_train.shape)
print("Validation set:", X_val.shape, y_val.shape)

Training set: torch.Size([80, 5]) torch.Size([80, 1])
Validation set: torch.Size([20, 5]) torch.Size([20, 1])


# 3. Multi-Layer Perceptron

In [29]:
class DetailedMLP(nn.Module):
    def __init__(self, input_dim=5):
        super(DetailedMLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, 8)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(8, 4)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(4, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x1 = self.fc1(x)
        x2 = self.relu1(x1)
        x3 = self.fc2(x2)
        x4 = self.relu2(x3)
        x5 = self.fc3(x4)
        out = self.sigmoid(x5)
        # Print each layer output
        print("fc1 output:\n", x1)
        print("ReLU1 output:\n", x2)
        print("fc2 output:\n", x3)
        print("ReLU2 output:\n", x4)
        print("fc3 output:\n", x5)
        print("Sigmoid output:\n", out)
        return out

model = DetailedMLP().to(device)
print(model)

DetailedMLP(
  (fc1): Linear(in_features=5, out_features=8, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=8, out_features=4, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=4, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


4. Loss & Optimizer

In [30]:
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# 5. Training Loop (detailed)

In [31]:
epochs = 3
batch_size = 16

for epoch in range(epochs):
    print(f"\n===== Epoch {epoch+1} =====")
    permutation = torch.randperm(X_train.size()[0])
    
    for i in range(0, X_train.size()[0], batch_size):
        indices = permutation[i:i+batch_size]
        batch_x, batch_y = X_train[indices], y_train[indices]
        
        # Forward pass
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        print("Batch loss:", loss.item())
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        print("Gradients fc1 weight:\n", model.fc1.weight.grad)
        print("Gradients fc1 bias:\n", model.fc1.bias.grad)
        
        # Update weights
        optimizer.step()
        print("Updated fc1 weight:\n", model.fc1.weight)
        print("Updated fc1 bias:\n", model.fc1.bias)
    
    # Validation
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = criterion(val_outputs, y_val)
        val_preds = (val_outputs>0.5).float()
        val_acc = (val_preds==y_val).float().mean()
    
    print(f"Epoch [{epoch+1}] | Validation Loss: {val_loss.item():.4f} | Validation Accuracy: {val_acc.item():.4f}")


===== Epoch 1 =====
fc1 output:
 tensor([[-0.3534, -0.2411,  0.3836, -0.6776, -0.0321,  0.4785,  0.5267, -0.3242],
        [ 0.0693, -0.2641, -0.5934,  0.3415,  0.0923,  0.3846, -0.1582, -0.1668],
        [ 0.4872,  0.2868, -1.7375,  0.9741,  0.8500,  0.6081, -0.4129,  0.3965],
        [ 0.1258,  2.8589, -0.4696, -2.5899,  2.0468,  0.7861,  1.4085,  1.1533],
        [-0.5012, -0.8995,  0.8936, -0.5662, -0.7650,  0.1850,  0.2956, -0.8238],
        [-0.0072, -2.1354, -0.5620,  1.9914, -1.1019,  0.1453, -1.0330, -0.9801],
        [ 0.4569,  0.5247, -1.4355,  0.4913,  0.7232,  0.4183, -0.3506,  0.3231],
        [-0.2562, -0.7034,  0.4738, -0.2892, -0.7241,  0.0195, -0.0587, -0.7432],
        [ 0.3184, -0.3829, -0.6021,  0.5640, -0.6147, -0.3009, -0.9211, -0.5515],
        [ 0.1495,  0.4004, -0.3929, -0.3987,  0.1187,  0.1004, -0.1082, -0.1088],
        [ 0.2471,  0.1667, -0.6715,  0.0895,  0.0487,  0.0855, -0.3692, -0.1413],
        [-0.3729, -0.9423,  0.6563, -0.2737, -0.8267,  0.0852,  