In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split
from torchvision.transforms import Normalize

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

import random

Loading the data retrieved from Kaggle

In [4]:
df = pd.read_csv("water_potability.csv")
df = df.dropna()

X = df[['ph', 'Sulfate', 'Conductivity', 'Organic_carbon']].values
y = df['Potability'].values



# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32).reshape(-1, 1)

# Define the model
model = nn.Sequential(
    nn.Linear(4, 12),
    nn.ReLU(),
    nn.Linear(12, 8),
    nn.ReLU(),
    nn.Linear(8, 1),
    nn.Sigmoid()
)
print(model)

# Train the model
loss_fn = nn.BCELoss()  # binary cross-entropy
optimizer = optim.Adam(model.parameters(), lr=0.001)

n_epochs = 100
batch_size = 10

for epoch in range(n_epochs):
    # Training loop
    for i in range(0, len(X_train), batch_size):
        Xbatch = X_train[i:i + batch_size]
        y_pred = model(Xbatch)
        ybatch = y_train[i:i + batch_size]
        loss = loss_fn(y_pred, ybatch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # Validation loop
    model.eval()
    with torch.no_grad():
        y_val_pred = model(X_val)
        val_loss = loss_fn(y_val_pred, y_val)
    
    # Print training and validation loss for each epoch
    print(f'Epoch {epoch + 1}/{n_epochs}, Training Loss: {loss.item()}, Validation Loss: {val_loss.item()}')

# Compute accuracy (no_grad is optional)
with torch.no_grad():
    y_pred = model(X_train)
accuracy = (y_pred.round() == y_train).float().mean()
print(f"Training Accuracy {accuracy}")

with torch.no_grad():
    y_val_pred = model(X_val)
accuracy_val = (y_val_pred.round() == y_val).float().mean()
print(f"Validation Accuracy {accuracy_val}")


Sequential(
  (0): Linear(in_features=4, out_features=12, bias=True)
  (1): ReLU()
  (2): Linear(in_features=12, out_features=8, bias=True)
  (3): ReLU()
  (4): Linear(in_features=8, out_features=1, bias=True)
  (5): Sigmoid()
)
Epoch 1/100, Training Loss: 0.7502984404563904, Validation Loss: 0.7455360889434814
Epoch 2/100, Training Loss: 0.7363768815994263, Validation Loss: 0.7176124453544617
Epoch 3/100, Training Loss: 0.7148601412773132, Validation Loss: 0.7052766680717468
Epoch 4/100, Training Loss: 0.6883490085601807, Validation Loss: 0.697323203086853
Epoch 5/100, Training Loss: 0.6660699844360352, Validation Loss: 0.6940444707870483
Epoch 6/100, Training Loss: 0.6476608514785767, Validation Loss: 0.6925652623176575
Epoch 7/100, Training Loss: 0.6253433227539062, Validation Loss: 0.6937960386276245
Epoch 8/100, Training Loss: 0.618306577205658, Validation Loss: 0.6942415833473206
Epoch 9/100, Training Loss: 0.6134709715843201, Validation Loss: 0.6945483684539795
Epoch 10/100, Tra