In [37]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchmetrics import Accuracy
import torch.nn.init as init


In [38]:
class WaterDataset(Dataset):
    def __init__(self, csv_path):
        super().__init__()
        # Load data to pandas DataFrame
        df = pd.read_csv(csv_path)
        # Convert data to a NumPy array and assign to self.data
        self.data = df.to_numpy()
        
    # Implement __len__ to return the number of data samples
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, idx):
        features = self.data[idx, :-1]
        # Assign last data column to label
        label = self.data[idx, -1]
        return features, label
    

In [39]:
# Create an instance of the WaterDataset
dataset_train = WaterDataset("water_train.csv")

# Create a DataLoader based on dataset_train
dataloader_train = DataLoader(
    dataset_train,
    batch_size=2,
    shuffle=True,
)

# Get a batch of features and labels
features, labels = next(iter(dataloader_train))
print(features, labels)

tensor([[0.4605, 0.5546, 0.5128, 0.4579, 0.5644, 0.1930, 0.4096, 0.4953, 0.5461],
        [0.4597, 0.3738, 0.5402, 0.6358, 0.4077, 0.6017, 0.3531, 0.6505, 0.5484]],
       dtype=torch.float64) tensor([1., 1.], dtype=torch.float64)


In [40]:
# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         # Define the three linear layers
#         self.fc1 = nn.Linear(9, 16)
#         self.fc2 = nn.Linear(16, 8)
#         self.fc3 = nn.Linear(8, 1)
#         
#     def forward(self, x):
#         # Pass x through linear layers adding activations
#         x = nn.functional.relu(self.fc1(x))
#         x = nn.functional.relu(self.fc2(x))
#         x = nn.functional.sigmoid(self.fc3(x))
#         return x
    
    
# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.fc1 = nn.Linear(9, 16)
#         self.fc2 = nn.Linear(16, 8)
#         self.fc3 = nn.Linear(8, 1)
#         
#         # Apply He initialization
#         init.kaiming_uniform_(self.fc1.weight)
#         init.kaiming_uniform_(self.fc2.weight)
#         init.kaiming_uniform_(self.fc3.weight, nonlinearity="sigmoid")
# 
#     def forward(self, x):
#         # Update ReLU activation to ELU
#         x = nn.functional.elu(self.fc1(x))
#         x = nn.functional.elu(self.fc2(x))
#         x = nn.functional.sigmoid(self.fc3(x))
#         return x
    
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(9, 16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, 1)
        # Add two batch normalization layers
        self.bn1 = nn.BatchNorm1d(16)
        self.bn2 = nn.BatchNorm1d(8)
        
        init.kaiming_uniform_(self.fc1.weight)
        init.kaiming_uniform_(self.fc2.weight)
        init.kaiming_uniform_(self.fc3.weight, nonlinearity="sigmoid") 
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = nn.functional.elu(x)

        # Pass x through the second set of layers
        x = self.fc2(x)
        x = self.bn2(x)
        x = nn.functional.elu(x)

        x = nn.functional.sigmoid(self.fc3(x))
        return x

In [41]:
criterion = nn.BCELoss()

net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01)

for epoch in range(1000):
    for features, labels in dataloader_train:
        optimizer.zero_grad()
        features = features.float()
        outputs = net(features)
        labels = labels.float()
        loss = criterion(outputs, labels.view(-1, 1))
        loss.backward()
        optimizer.step()

In [42]:
# Create an instance of the WaterDataset
dataset_test = WaterDataset("water_test.csv")

# Create a DataLoader based on dataset_train
dataloader_test = DataLoader(
    dataset_test,
    batch_size=2,
    shuffle=True,
)

# Get a batch of features and labels
features_test, labels_test = next(iter(dataloader_test))
print(features_test, labels_test)

# Set up binary accuracy metric
acc = Accuracy(task="binary")

net.eval()
with torch.no_grad():
    for features, labels in dataloader_test:
        # Get predicted probabilities for test data batch
        features = features.float()
        labels = labels.float()
        outputs = net(features)
        preds = (outputs >= 0.5).float()
        acc(preds, labels.view(-1, 1))

# Compute total test accuracy
test_accuracy = acc.compute()
print(f"Test accuracy: {test_accuracy}")

tensor([[0.4728, 0.3403, 0.5453, 0.4390, 0.6652, 0.4054, 0.3811, 0.5353, 0.7462],
        [0.6476, 0.5668, 0.2065, 0.4976, 0.5987, 0.5910, 0.6480, 0.3384, 0.4647]],
       dtype=torch.float64) tensor([0., 0.], dtype=torch.float64)
Test accuracy: 0.5904572606086731
