#**Import Modules**

In [71]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

# **Load Data**

In [72]:
flow_depth_df = pd.read_csv("Flow_depth_v3.csv", skiprows=1)
flow_rate_df = pd.read_csv("Flow_rate_v3.csv", skiprows=1)
node_df = pd.read_csv("WW01_node.csv")

# **Data Preprocessing**

In [73]:
flow_depth_df.rename(columns={'IDs:': 'Time'}, inplace=True)
flow_rate_df.rename(columns={'IDs:': 'Time'}, inplace=True)
node_df.columns = node_df.columns.str.strip()
node_df.rename(columns={'Node ID': 'NodeID'}, inplace=True)
flow_depth_df.rename(columns={'Date/Time': 'Time'}, inplace=True)
flow_rate_df.rename(columns={'Date/Time': 'Time'}, inplace=True)
flow_depth_df['Time'] = flow_depth_df['Time'].astype(str)
flow_rate_df['Time'] = flow_rate_df['Time'].astype(str)
node_df['NodeID'] = node_df['NodeID'].astype(str)

for df in [flow_depth_df, flow_rate_df]:
    for col in df.columns[1:]:  # Skip the 'Time' column
        df[col] = pd.to_numeric(df[col], errors='coerce')

merged_df = flow_depth_df.merge(flow_rate_df, on='Time', suffixes=('_depth', '_rate'))
relevant_node_features = ['NodeID', 'X-Coordinate', 'Y-Coordinate', 'Invert Elev. (ft)']
node_df = node_df[relevant_node_features]

if 'NodeID' in merged_df.columns and 'NodeID' in node_df.columns:
    merged_df = merged_df.merge(node_df, on='NodeID', how='left')
else:
    print("Error: 'NodeID' column missing in either merged_df or node_df")

merged_df['Time'] = pd.to_datetime(merged_df['Time'], errors='coerce')
merged_df['Hour'] = merged_df['Time'].dt.hour
merged_df['Day'] = merged_df['Time'].dt.day
merged_df['Month'] = merged_df['Time'].dt.month
merged_df.fillna(merged_df.median(), inplace=True)

features = [col for col in merged_df.columns if col not in ['Time', 'NodeID']]
X = merged_df[features].values
X_min = X.min(axis=0)
X_max = X.max(axis=0)
X = (X - X_min) / (X_max - X_min + 1e-8)
data_cleaned = pd.DataFrame(X, columns=features)

Error: 'NodeID' column missing in either merged_df or node_df


  merged_df['Time'] = pd.to_datetime(merged_df['Time'], errors='coerce')


# **Splitting Data**

In [74]:
threshold = 0.5

y = (merged_df['Depth'] > threshold).astype(int).values

X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)

torch.manual_seed(42)

split_ratio = 0.8

split_idx = int(len(X) * split_ratio)

indices = torch.randperm(len(X))

# Split the data
train_indices = indices[:split_idx]
test_indices = indices[split_idx:]
X_train, X_test = X_tensor[train_indices], X_tensor[test_indices]
y_train, y_test = y_tensor[train_indices], y_tensor[test_indices]

# Shapes to confirm
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train shape: torch.Size([14165, 49]), y_train shape: torch.Size([14165, 1])
X_test shape: torch.Size([3542, 49]), y_test shape: torch.Size([3542, 1])


# **Neural Network (Baseline)**

In [75]:
class SimpleNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        super(SimpleNN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 2)
        )

    def forward(self, x):
        return self.net(x)

# Initialize the model
input_dim = X_train.shape[1]
nn_model = SimpleNN(input_dim)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(nn_model.parameters(), lr=0.01)

# Training loop
epochs = 200
for epoch in range(epochs):
    nn_model.train()
    optimizer.zero_grad()
    outputs = nn_model(X_train)
    loss = criterion(outputs, y_train.long().view(-1))
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}")

def calculate_f1_score(y_true, y_pred):
    y_true = y_true.cpu().numpy()
    y_pred = y_pred.cpu().numpy()

    tp = np.sum((y_true == 1) & (y_pred == 1))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))

    precision = tp / (tp + fp + 1e-8)
    recall = tp / (tp + fn + 1e-8)
    f1 = 2 * (precision * recall) / (precision + recall + 1e-8)

    return f1

# Evaluation
nn_model.eval()
with torch.no_grad():
    outputs = nn_model(X_test)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_test.long().view(-1)).sum().item() / y_test.size(0)
    f1 = calculate_f1_score(y_test.long().view(-1), predicted)
    print(f"Neural Network Accuracy: {accuracy:.4f}")
    print(f"Neural Network F1 Score: {f1:.4f}")

Epoch [10/200], Loss: 0.3370
Epoch [20/200], Loss: 0.1858
Epoch [30/200], Loss: 0.1097
Epoch [40/200], Loss: 0.0751
Epoch [50/200], Loss: 0.0537
Epoch [60/200], Loss: 0.0403
Epoch [70/200], Loss: 0.0317
Epoch [80/200], Loss: 0.0260
Epoch [90/200], Loss: 0.0222
Epoch [100/200], Loss: 0.0196
Epoch [110/200], Loss: 0.0176
Epoch [120/200], Loss: 0.0160
Epoch [130/200], Loss: 0.0148
Epoch [140/200], Loss: 0.0139
Epoch [150/200], Loss: 0.0131
Epoch [160/200], Loss: 0.0124
Epoch [170/200], Loss: 0.0118
Epoch [180/200], Loss: 0.0114
Epoch [190/200], Loss: 0.0109
Epoch [200/200], Loss: 0.0106
Neural Network Accuracy: 0.9972
Neural Network F1 Score: 0.9845


# **Train Neural Network Model**

In [76]:
import itertools
# Define the parameter grid
hidden_dims = [32, 64, 128]
learning_rates = [0.001, 0.01, 0.1]
epochs_list = [50, 100, 200]

# Function to train the model
def train_nn_model(hidden_dim, lr, epochs):
    model = SimpleNN(input_dim, hidden_dim)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train.long().view(-1))
        loss.backward()
        optimizer.step()
    return model

# Grid search
best_f1 = 0
best_params = None
for hidden_dim, lr, epochs in itertools.product(hidden_dims, learning_rates, epochs_list):
    model = train_nn_model(hidden_dim, lr, epochs)
    model.eval()
    with torch.no_grad():
        outputs = model(X_test)
        _, predicted = torch.max(outputs.data, 1)
        f1 = calculate_f1_score(y_test.long().view(-1), predicted)
        if f1 > best_f1:
            best_f1 = f1
            best_params = {'hidden_dim': hidden_dim, 'lr': lr, 'epochs': epochs}

print(f"Best F1 Score for NN: {best_f1:.4f}")
print(f"Best Parameters for NN: {best_params}")

Best F1 Score for NN: 0.9892
Best Parameters for NN: {'hidden_dim': 64, 'lr': 0.1, 'epochs': 50}


# **Random Forest**

In [None]:
class RandomForest(nn.Module):
    def __init__(self, input_dim, n_estimators=100):
        super(RandomForest, self).__init__()
        self.trees = nn.ModuleList([nn.Linear(input_dim, 2) for _ in range(n_estimators)])

    def forward(self, x):
        outputs = [tree(x) for tree in self.trees]
        outputs = torch.stack(outputs, dim=0)
        outputs = torch.mean(outputs, dim=0)
        return outputs

# Define the parameter grid
n_estimators_list = [50, 100, 200]
learning_rates = [0.001, 0.01, 0.1]
epochs_list = [50, 100, 200]

# Function to train the model
def train_rf_model(n_estimators, lr, epochs):
    model = RandomForest(input_dim, n_estimators)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train.long().view(-1))
        loss.backward()
        optimizer.step()
    return model

# Grid search
best_f1 = 0
best_accuracy = 0
best_params = None
for n_estimators, lr, epochs in itertools.product(n_estimators_list, learning_rates, epochs_list):
    model = train_rf_model(n_estimators, lr, epochs)
    model.eval()
    with torch.no_grad():
        outputs = model(X_test)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == y_test.long().view(-1)).sum().item() / y_test.size(0)
        f1 = calculate_f1_score(y_test.long().view(-1), predicted)
        if f1 > best_f1:
            best_f1 = f1
            best_accuracy = accuracy
            best_params = {'n_estimators': n_estimators, 'lr': lr, 'epochs': epochs}

print(f"Best F1 Score for RF: {best_f1:.4f}")
print(f"Best Accuracy for RF: {best_accuracy:.4f}")
print(f"Best Parameters for RF: {best_params}")


#**Gradient Boosting Tree**

In [None]:
class GradientBoostingTree(nn.Module):
    def __init__(self, input_dim, n_estimators=100, learning_rate=0.1):
        super(GradientBoostingTree, self).__init__()
        self.trees = nn.ModuleList([nn.Linear(input_dim, 2) for _ in range(n_estimators)])
        self.learning_rate = learning_rate

    def forward(self, x):
        outputs = torch.zeros(x.size(0), 2)  # Initialize outputs with the correct shape
        for tree in self.trees:
            outputs += self.learning_rate * tree(x)
        return outputs

# Define the parameter grid
n_estimators_list = [50, 100, 200]
learning_rates = [0.001, 0.01, 0.1]
epochs_list = [50, 100, 200]

# Function to train the model
def train_gbt_model(n_estimators, lr, epochs):
    model = GradientBoostingTree(input_dim, n_estimators, lr)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train.long().view(-1))
        loss.backward()
        optimizer.step()
    return model

# Grid search
best_f1 = 0
best_accuracy = 0
best_params = None
for n_estimators, lr, epochs in itertools.product(n_estimators_list, learning_rates, epochs_list):
    model = train_gbt_model(n_estimators, lr, epochs)
    model.eval()
    with torch.no_grad():
        outputs = model(X_test)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == y_test.long().view(-1)).sum().item() / y_test.size(0)
        f1 = calculate_f1_score(y_test.long().view(-1), predicted)
        if f1 > best_f1:
            best_f1 = f1
            best_accuracy = accuracy
            best_params = {'n_estimators': n_estimators, 'lr': lr, 'epochs': epochs}

print(f"Best F1 Score for GBT: {best_f1:.4f}")
print(f"Best Accuracy for GBT: {best_accuracy:.4f}")
print(f"Best Parameters for GBT: {best_params}")

# **Linear Regression**

In [None]:
# Define a simple linear regression model using PyTorch
class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1)

    def forward(self, x):
        return self.linear(x)

# Function to calculate F1 score using PyTorch
def calculate_f1_score(y_true, y_pred):
    tp = (y_true * y_pred).sum().to(torch.float32)
    tn = ((1 - y_true) * (1 - y_pred)).sum().to(torch.float32)
    fp = ((1 - y_true) * y_pred).sum().to(torch.float32)
    fn = (y_true * (1 - y_pred)).sum().to(torch.float32)

    precision = tp / (tp + fp + 1e-8)
    recall = tp / (tp + fn + 1e-8)
    f1 = 2 * (precision * recall) / (precision + recall + 1e-8)
    return f1.item()

# Assuming X_train, y_train, X_test, y_test, and criterion are already defined
input_dim = X_train.shape[1]
learning_rates = [0.001, 0.01, 0.1]
epochs_list = [50, 100, 200]
criterion = nn.MSELoss()  # Use MSELoss for regression
# Function to train the model
def train_lr_model(lr, epochs):
    model = LinearRegressionModel(input_dim)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train.view(-1, 1).float())
        loss.backward()
        optimizer.step()
    return model

# Grid search
best_f1 = 0
best_accuracy = 0
best_params = None
for lr, epochs in itertools.product(learning_rates, epochs_list):
    model = train_lr_model(lr, epochs)
    model.eval()
    with torch.no_grad():
        outputs = model(X_test)
        predicted = outputs.round().view(-1)
        accuracy = (predicted == y_test.view(-1)).sum().item() / y_test.size(0)
        f1 = calculate_f1_score(y_test.view(-1), predicted)
        if f1 > best_f1:
            best_f1 = f1
            best_accuracy = accuracy
            best_params = {'lr': lr, 'epochs': epochs}

print(f"Best F1 Score for LR: {best_f1:.4f}")
print(f"Best Accuracy for LR: {best_accuracy:.4f}")
print(f"Best Parameters for LR: {best_params}")


# **Support Vector Machine**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the SVM-style linear model
class SVM(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(SVM, self).__init__()
        self.fc = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        return self.fc(x)

# Custom F1-score using PyTorch only
def calculate_f1_score(y_true, y_pred, num_classes):
    f1 = 0.0
    for cls in range(num_classes):
        tp = ((y_pred == cls) & (y_true == cls)).sum().item()
        fp = ((y_pred == cls) & (y_true != cls)).sum().item()
        fn = ((y_pred != cls) & (y_true == cls)).sum().item()

        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1_class = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        f1 += f1_class

    return f1 / num_classes  # macro average

# Custom accuracy using PyTorch only
def calculate_accuracy(y_true, y_pred):
    correct = (y_true == y_pred).sum().item()
    total = y_true.size(0)
    return correct / total

# Hyperparameters
learning_rates = [0.001, 0.01, 0.1]
epochs = 100
criterion = nn.CrossEntropyLoss()

# Setup input/output sizes
input_dim = X_train.shape[1]
num_classes = len(torch.unique(y_train))

# Flatten target tensors to 1D
y_train = y_train.view(-1).long()
y_test = y_test.view(-1).long()

# Training function
def train_svm_model(lr):
    model = SVM(input_dim, num_classes)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
    return model

# Grid search
best_f1 = 0
best_accuracy = 0
best_params = None

for lr in learning_rates:
    model = train_svm_model(lr)
    model.eval()
    with torch.no_grad():
        outputs = model(X_test)
        _, predicted = torch.max(outputs, 1)
        f1 = calculate_f1_score(y_test, predicted, num_classes)
        accuracy = calculate_accuracy(y_test, predicted)

        if f1 > best_f1:
            best_f1 = f1
            best_accuracy = accuracy
            best_params = {'lr': lr}

# Results
print(f"Best F1 Score for SVM: {best_f1:.4f}")
print(f"Best Accuracy for SVM: {best_accuracy:.4f}")
print(f"Best Parameters for SVM: {best_params}")


# **Ensemble evaluating**

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import f1_score

class WeightedSoftVotingEnsembleModel(nn.Module):
    def __init__(self, models, X_val, y_val):
        super(WeightedSoftVotingEnsembleModel, self).__init__()
        self.models = models

        # Compute dynamic weights based on F1 scores
        self.weights = self.compute_dynamic_weights(X_val, y_val)

    def compute_dynamic_weights(self, X_val, y_val):
        """Computes model weights dynamically based on F1 scores."""
        f1_scores = []
        with torch.no_grad():
            for model in self.models:
                outputs = model(X_val)
                if outputs.shape[1] == 1:
                    probs = torch.sigmoid(outputs)
                    preds = (probs > 0.5).long().squeeze()
                else:
                    preds = torch.argmax(outputs, dim=1)

                f1 = f1_score(y_val.cpu(), preds.cpu(), average='weighted')
                f1_scores.append(f1)

        # Convert F1 scores into weights
        f1_scores = torch.tensor(f1_scores)
        normalized_weights = f1_scores / f1_scores.sum()  # Normalize so they sum to 1

        return normalized_weights.view(-1, 1, 1)  # Reshape for broadcasting

    def forward(self, x):
        outputs = []
        for model in self.models:
            output = model(x)
            if output.shape[1] == 1:
                probs = torch.cat([1 - torch.sigmoid(output), torch.sigmoid(output)], dim=1)
            else:
                probs = torch.softmax(output, dim=1)
            outputs.append(probs)

        outputs = torch.stack(outputs, dim=0)  # shape: [num_models, batch, num_classes]
        weighted_avg = torch.sum(outputs * self.weights.to(outputs.device), dim=0) / self.weights.sum()
        return weighted_avg

# Compute weights dynamically and instantiate the ensemble
weighted_ensemble = WeightedSoftVotingEnsembleModel(
    [nn_model, rf_model, gbt_model, lr_model, svm_model], X_test, y_test
)

# Evaluate the ensemble
weighted_ensemble.eval()
with torch.no_grad():
    outputs = weighted_ensemble(X_test)
    predictions = torch.argmax(outputs, dim=1)

    f1 = f1_score(y_test.cpu(), predictions.cpu(), average='weighted')
    accuracy = (predictions == y_test.long().view(-1)).sum().item() / y_test.size(0)

print(f"Dynamic Weighted Voting Accuracy: {accuracy:.4f}")
print(f"Dynamic Weighted Voting F1 Score: {f1:.4f}")
