In [351]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader

In [352]:
# check if running on GPU or CPU
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [353]:
water = pd.read_csv('water.csv')

In [354]:
X = water.drop(columns=['Speed', 'Name', 'Unnamed: 0'])
y = water['Speed']

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

y = y.values

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [355]:


X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 10
train_loader = DataLoader(train_data, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test_data, batch_size = 10, shuffle=False)

In [356]:
class Net(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Net, self).__init__()
        # first linear layer: input to hidden
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        # Activation function: Relu is common and helps avoid vanishing gradients
        self.relu = nn.ReLU()
        # second linear layer: hidden to output
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # apply first linear transofmration and ReLu activation
        x = self.relu(self.fc1(x))
        # output layer
        x = self.fc2(x)
        return x

In [357]:
num_epochs = 100
learning_rate = 0.01

input_dim = X_train_tensor.shape[1]
hidden_dim = 20
output_dim = 1

In [358]:
model = Net(input_dim, hidden_dim, output_dim)
print(model)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

Net(
  (fc1): Linear(in_features=43, out_features=20, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=20, out_features=1, bias=True)
)


In [359]:
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        output = model(batch_x)
        loss = criterion(output, batch_y)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item() * batch_x.size(0)

    avg_train_loss = epoch_loss / len(train_data)

    model.eval()
    test_loss = 0.0

    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            test_loss += loss.item() * batch_x.size(0)
    avg_test_loss = test_loss / len(test_data)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {avg_train_loss:.4f}, Test Loss: {avg_test_loss:.4f}")

Epoch 10/100 - Train Loss: 818.5316, Test Loss: 475.6337
Epoch 20/100 - Train Loss: 519.4778, Test Loss: 474.4109
Epoch 30/100 - Train Loss: 483.1325, Test Loss: 447.4847
Epoch 40/100 - Train Loss: 454.8439, Test Loss: 433.5175
Epoch 50/100 - Train Loss: 435.1143, Test Loss: 425.7854
Epoch 60/100 - Train Loss: 423.0749, Test Loss: 411.0230
Epoch 70/100 - Train Loss: 410.8877, Test Loss: 414.1554
Epoch 80/100 - Train Loss: 401.0240, Test Loss: 403.7340
Epoch 90/100 - Train Loss: 394.7698, Test Loss: 397.2360
Epoch 100/100 - Train Loss: 387.2708, Test Loss: 389.2713


In [None]:
model.eval()

y_preds = []
y_actuals = []

with torch.no_grad():
    for batch_x, batch_y in test_loader:
        outputs = model(batch_x)
        y_preds.append(outputs)
        y_actuals.append(batch_y)

y_preds = torch.cat(y_preds).numpy().squeeze()
y_actuals = torch.cat(y_actuals).numpy().squeeze()

# Create a scatter plot
plt.figure(figsize=(8, 6))
plt.scatter(y_actuals, y_preds, alpha=0.6)
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")
plt.title("Predicted vs Actual Values")
# Optionally, plot the perfect prediction line (y=x) for reference:
plt.plot([y_actuals.min(), y_actuals.max()], [y_actuals.min(), y_actuals.max()], 'r--')
plt.show()

AttributeError: 'Tensor' object has no attribute 'np'