In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# 1. Load the data
train_data = pd.read_csv('data_train.csv', header=None)
test_data = pd.read_csv('data_test.csv', header=None)

# Check the shapes
print(f"Training data shape: {train_data.shape}")
print(f"Test data shape: {test_data.shape}")

# For training data, assume first column is the label
X_train_full = train_data.iloc[:, 1:].values  # Features (all columns except first)
y_train_full = train_data.iloc[:, 0].values   # Labels (first column)

# For test data, all columns are features
X_test = test_data.values

# 2. Preprocess the data
# Standardize features
scaler = StandardScaler()
X_train_full = scaler.fit_transform(X_train_full)
X_test = scaler.transform(X_test)

# Split training data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=0.2, random_state=42
)

# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train).unsqueeze(1)
X_val_tensor = torch.FloatTensor(X_val)
y_val_tensor = torch.FloatTensor(y_val).unsqueeze(1)
X_test_tensor = torch.FloatTensor(X_test)

# Create datasets and dataloaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# 3. Define the MLP model
class MLP(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(MLP, self).__init__()
        
        layers = []
        prev_size = input_size
        
        # Create hidden layers with tanh activation
        for h_size in hidden_sizes:
            layers.append(nn.Linear(prev_size, h_size))
            layers.append(nn.Tanh())
            prev_size = h_size
        
        # Output layer with sigmoid for binary classification
        layers.append(nn.Linear(prev_size, output_size))
        layers.append(nn.Sigmoid())
        
        self.model = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.model(x)

# Define hyperparameters
input_size = X_train.shape[1]  # Number of features
hidden_sizes = [20, 10]  # Two hidden layers with 20 and 10 neurons
output_size = 1  # Binary classification

# Initialize the model
model = MLP(input_size, hidden_sizes, output_size)
print(model)

# 4. Define loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 5. Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=100):
    train_losses = []
    val_losses = []
    val_accuracies = []
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        
        for inputs, labels in train_loader:
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
        
        epoch_train_loss = running_loss / len(train_loader.dataset)
        train_losses.append(epoch_train_loss)
        
        # Validation phase
        model.eval()
        running_loss = 0.0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                running_loss += loss.item() * inputs.size(0)
                
                # Calculate accuracy
                predicted = (outputs >= 0.5).float()
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        epoch_val_loss = running_loss / len(val_loader.dataset)
        epoch_val_acc = correct / total
        
        val_losses.append(epoch_val_loss)
        val_accuracies.append(epoch_val_acc)
        
        # Print progress
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}/{num_epochs}, '
                  f'Train Loss: {epoch_train_loss:.4f}, '
                  f'Val Loss: {epoch_val_loss:.4f}, '
                  f'Val Accuracy: {epoch_val_acc:.4f}')
    
    return train_losses, val_losses, val_accuracies

# 6. Train the model
train_losses, val_losses, val_accuracies = train_model(
    model, train_loader, val_loader, criterion, optimizer, num_epochs=100
)

# 7. Plot training and validation metrics
plt.figure(figsize=(12, 5))

# Plot losses
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot validation accuracy
plt.subplot(1, 2, 2)
plt.plot(val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Validation Accuracy')
plt.legend()

plt.tight_layout()
plt.savefig('training_plots.png')
plt.close()

# 8. Make predictions on the test set
model.eval()
with torch.no_grad():
    test_predictions = model(X_test_tensor)
    predicted_classes = (test_predictions >= 0.5).float().squeeze().numpy()

# 9. Save test predictions to a file
test_predictions_df = pd.DataFrame(predicted_classes)
test_predictions_df.to_csv('test_predictions.csv', index=False, header=False)

print("Predictions for test data:")
print(predicted_classes)
print(f"Total samples: {len(predicted_classes)}")
print(f"Class 0: {np.sum(predicted_classes == 0)}")
print(f"Class 1: {np.sum(predicted_classes == 1)}")

# 10. Model Architecture Summary
print("\nModel Architecture:")
print(f"Input features: {input_size}")
print(f"Hidden layers: {hidden_sizes}")
print(f"Output: {output_size}")
print(f"Activation function: Tanh (hidden layers), Sigmoid (output layer)")
print(f"Loss function: Binary Cross Entropy")
print(f"Optimizer: Adam (learning rate = 0.001)")
print(f"Best validation accuracy: {max(val_accuracies):.4f}")

Training data shape: (301, 34)
Test data shape: (50, 33)
MLP(
  (model): Sequential(
    (0): Linear(in_features=33, out_features=20, bias=True)
    (1): Tanh()
    (2): Linear(in_features=20, out_features=10, bias=True)
    (3): Tanh()
    (4): Linear(in_features=10, out_features=1, bias=True)
    (5): Sigmoid()
  )
)
Epoch 10/100, Train Loss: 0.4181, Val Loss: 0.4791, Val Accuracy: 0.8197
Epoch 20/100, Train Loss: 0.2064, Val Loss: 0.3432, Val Accuracy: 0.8689
Epoch 30/100, Train Loss: 0.1139, Val Loss: 0.3332, Val Accuracy: 0.8525
Epoch 40/100, Train Loss: 0.0699, Val Loss: 0.3664, Val Accuracy: 0.8197
Epoch 50/100, Train Loss: 0.0441, Val Loss: 0.4290, Val Accuracy: 0.8033
Epoch 60/100, Train Loss: 0.0294, Val Loss: 0.4761, Val Accuracy: 0.8197
Epoch 70/100, Train Loss: 0.0208, Val Loss: 0.5166, Val Accuracy: 0.8197
Epoch 80/100, Train Loss: 0.0154, Val Loss: 0.5538, Val Accuracy: 0.8197
Epoch 90/100, Train Loss: 0.0119, Val Loss: 0.5844, Val Accuracy: 0.8197
Epoch 100/100, Train L

Explanation:-
1. Data Preparation
First, I loaded the training data (data_train.csv) and test data (data_test.csv). In the
training dataset, the first column represents the class labels, and the remaining 33 columns are
features. The test dataset only contains the 33 features and no labels.
To ensure the model could train effectively, I standardized all the feature values
using StandardScaler, which scales the data to have zero mean and unit variance. I then
split the training data into training and validation sets, using an 80/20 split.

2. Building the MLP Model
Next, I created a feedforward neural network with the following architecture:
• Input Layer: 33 neurons (one for each feature)
• Two Hidden Layers:
o First hidden layer with 20 neurons
o Second hidden layer with 10 neurons
o Both hidden layers use the tanh activation function
• Output Layer: 1 neuron with a sigmoid activation function for binary classification
I used PyTorch’s nn.Sequential to stack the layers and activations together in a clean and
modular way.

3. Training the Model
For training, I used the Binary Cross Entropy Loss (BCELoss) since it's a binary
classification task. I optimized the model using the Adam optimizer with a learning rate of
0.001.
I trained the model for 100 epochs, using a batch size of 32. After each epoch, I evaluated the
model’s performance on the validation set by calculating both the validation loss and
accuracy.
To track the training process, I saved plots of the training and validation loss, as well as the
validation accuracy over time.

4. Making Predictions
After training the model, I used it to predict the class labels for the test data. Since the output
layer uses a sigmoid activation function, the model outputs a value between 0 and 1. I
classified each test sample as:
• Class 1 if the output was ≥ 0.5
• Class 0 if the output was < 0.5
I saved the predicted labels to a file called test_predictions.csv.