In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

# Define the Neural Network Model with Dropout to avoid overfitting
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        
        # Define layers: 9 input features, 3 hidden layers, 1 output
        self.fc1 = nn.Linear(9, 64)  # First hidden layer: 9 inputs, 64 hidden units
        self.fc2 = nn.Linear(64, 128)  # Second hidden layer: 64 inputs, 128 hidden units
        self.fc3 = nn.Linear(128, 64)  # Third hidden layer: 128 inputs, 64 hidden units
        self.output = nn.Linear(64, 1)  # Output layer: 64 inputs, 1 output
        
        # Activation function
        self.relu = nn.ReLU()

        # Dropout layers to avoid overfitting
        self.dropout = nn.Dropout(p=0.3)

    def forward(self, x):
        # Define forward pass with dropout
        x = self.relu(self.fc1(x))  # First hidden layer + activation
        x = self.dropout(x)  # Dropout after first hidden layer
        x = self.relu(self.fc2(x))  # Second hidden layer + activation
        x = self.dropout(x)  # Dropout after second hidden layer
        x = self.relu(self.fc3(x))  # Third hidden layer + activation
        x = self.output(x)  # Output layer
        return x

def train_model(model, train_loader, criterion, optimizer, epochs):
    for epoch in range(epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        for data, target in train_loader:
            data, target = data.float(), target.float()

            # Forward pass
            outputs = model(data)
            loss = criterion(outputs, target)

            # Backward pass and optimization
            optimizer.zero_grad()  # Zero the gradients
            loss.backward()  # Compute gradients
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

            optimizer.step()  # Update weights

            running_loss += loss.item()

        # Print the average loss for this epoch
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(train_loader)}")

def evaluate_model(model, test_loader, scaler, lat_test, lon_test):
    model.eval()  # Set the model to evaluation mode

    with torch.no_grad():
        for i, (data, target) in enumerate(test_loader):
            data, target = data.float(), target.float()
            predicted = model(data)

            # Unnormalize the predictions and targets
            predicted_unscaled = scaler.inverse_transform(predicted.numpy())
            target_unscaled = scaler.inverse_transform(target.numpy())

            # Display test cases: latitude, longitude, predicted, and actual label
            for j in range(len(predicted)):
                print(f"Lat: {lat_test[i*len(predicted) + j]}, Long: {lon_test[i*len(predicted) + j]}")
                print(f"Predicted Label: {predicted_unscaled[j][0]:.4f}, Actual Label: {target_unscaled[j][0]:.4f}\n")

def main():
    # Read CSV data from 'TrainingData.csv'
    data = pd.read_csv('TrainingData.csv')

    # Filter rows to include only rows from 2 to 9964 (indexing 1 to 9963)
    data = data.iloc[1:9964]

    # Extracting labels, features, and lat/long (for test case printing)
    labels = data.iloc[:, 0].values  # Labels (first column)
    features = data.iloc[:, 3:12].values  # Features (columns 4 to 12)
    latitudes = data['Latitude'].values
    longitudes = data['Longitude'].values

    # Standardize the features
    feature_scaler = StandardScaler()
    features = feature_scaler.fit_transform(features)

    # Standardize the labels (optional depending on your label range)
    label_scaler = StandardScaler()
    labels = label_scaler.fit_transform(labels.reshape(-1, 1))

    # Convert to torch tensors
    features = torch.tensor(features, dtype=torch.float32)
    labels = torch.tensor(labels, dtype=torch.float32).view(-1, 1)  # Reshape labels to match output

    # Train-Test Split (80% train, 20% test)
    X_train, X_test, y_train, y_test, lat_train, lat_test, lon_train, lon_test = train_test_split(
        features, labels, latitudes, longitudes, test_size=0.2, random_state=42)

    # Create DataLoader for batches
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    # Create a model instance
    model = SimpleNN()

    # Adam optimizer with lower learning rate and weight decay
    optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)

    # Mean Squared Error loss
    criterion = nn.MSELoss()

    # Number of training epochs
    epochs = 10

    # Train the model and print loss after each epoch
    train_model(model, train_loader, criterion, optimizer, epochs)

    # Evaluate the model and print test cases
    evaluate_model(model, test_loader, label_scaler, lat_test, lon_test)

if __name__ == '__main__':
    main()


Epoch [1/10], Loss: 0.7763952226638794
Epoch [2/10], Loss: 0.2096254352927208
Epoch [3/10], Loss: 0.07884595558047294
Epoch [4/10], Loss: 0.07255990152060986
Epoch [5/10], Loss: 0.06450063760578632
Epoch [6/10], Loss: 0.06017607666552067
Epoch [7/10], Loss: 0.05735053467750549
Epoch [8/10], Loss: 0.05312823927402496
Epoch [9/10], Loss: 0.0521669330894947
Epoch [10/10], Loss: 0.048508037477731705
Lat: -23.6217, Long: -70.3776
Predicted Label: 0.8808, Actual Label: 0.8870

Lat: -3.1673, Long: -41.5521
Predicted Label: 0.7183, Actual Label: 0.7195

Lat: -10.9001, Long: -42.5801
Predicted Label: 0.7986, Actual Label: 0.8142

Lat: 46.0234, Long: 124.7347
Predicted Label: 0.4635, Actual Label: 0.4585

Lat: 43.0243, Long: -108.385
Predicted Label: 0.7393, Actual Label: 0.7278

Lat: 34.7292, Long: 112.1364
Predicted Label: 0.5886, Actual Label: 0.5980

Lat: 34.6813, Long: -118.3141
Predicted Label: 0.5485, Actual Label: 0.5548

Lat: 29.4136, Long: 112.1653
Predicted Label: 0.5313, Actual Label