In [1]:
import os, sys

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

from data.data_helpers import get_dataframe



In [2]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [3]:
df = get_dataframe()
df.head()



Skipping existing file /Users/alfiehofmann/cis5200-project/data/era5_tehachapi_2018_H1.nc
Skipping existing file /Users/alfiehofmann/cis5200-project/data/era5_tehachapi_2018_H2.nc
Skipping existing file /Users/alfiehofmann/cis5200-project/data/era5_tehachapi_2019_H1.nc
Skipping existing file /Users/alfiehofmann/cis5200-project/data/era5_tehachapi_2019_H2.nc
Skipping existing file /Users/alfiehofmann/cis5200-project/data/era5_tehachapi_2020_H1.nc
Skipping existing file /Users/alfiehofmann/cis5200-project/data/era5_tehachapi_2020_H2.nc


Unnamed: 0,datetime,u10,v10,t2m,sp,hour_sin,hour_cos,month_sin,month_cos,doy_sin,doy_cos,wind_speed,target_next_hour
0,2018-01-01 00:00:00,-0.808231,-0.069685,291.890625,89925.25,0.0,1.0,0.5,0.866025,0.017213,0.999852,0.811229,1.524293
1,2018-01-01 01:00:00,-1.340298,-0.725997,290.692627,89972.25,0.258819,0.965926,0.5,0.866025,0.017213,0.999852,1.524293,1.245654
2,2018-01-01 02:00:00,-0.462882,-1.156458,288.544922,90036.1875,0.5,0.866025,0.5,0.866025,0.017213,0.999852,1.245654,1.053555
3,2018-01-01 03:00:00,0.300817,-1.009697,285.121826,90091.3125,0.707107,0.707107,0.5,0.866025,0.017213,0.999852,1.053555,1.122294
4,2018-01-01 04:00:00,0.736362,-0.846944,283.71582,90125.125,0.866025,0.5,0.5,0.866025,0.017213,0.999852,1.122294,1.363163


In [4]:
feature_cols = [c for c in df.columns if c not in ["datetime", "target_next_hour"]]
X = df[feature_cols]
y = df["target_next_hour"]

In [5]:
n_samples = len(X)
train_size = int(n_samples * 0.8)

X_train_df = X.iloc[:train_size]
X_test_df  = X.iloc[train_size:]
y_train_sr = y.iloc[:train_size]
y_test_sr  = y.iloc[train_size:]

In [6]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_df.values)
X_test_scaled  = scaler.transform(X_test_df.values)

# Convert to tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_sr.values, dtype=torch.float32).view(-1, 1)

X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_sr.values, dtype=torch.float32).view(-1, 1)

# Dataloaders
batch_size = 128
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

In [7]:
input_dim = X_train_tensor.shape[1]

class WindNet(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.net(x)

model = WindNet(input_dim)

In [8]:
def asymmetric_mse_loss(y_pred, y_true, alpha=2.0):
    """
    Penalize under-predictions (y_pred < y_true) more than over-predictions.
    alpha > 1 => under-predictions weighted more heavily.
    """
    diff = y_pred - y_true                  # error
    under_mask = (diff < 0).float()         # 1 where underpredict
    weights = 1.0 + (alpha - 1.0) * under_mask
    return torch.mean(weights * diff**2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 30
alpha = 2.0   # how much more we penalize underprediction

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0

    for batch_X, batch_y in train_loader:
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)

        optimizer.zero_grad()
        preds = model(batch_X)
        loss = asymmetric_mse_loss(preds, batch_y, alpha=alpha)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * batch_X.size(0)

    avg_loss = total_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs} - train asym MSE: {avg_loss:.4f}")

Epoch 1/30 - train asym MSE: 6.0061
Epoch 2/30 - train asym MSE: 0.5038
Epoch 3/30 - train asym MSE: 0.3687
Epoch 4/30 - train asym MSE: 0.3406
Epoch 5/30 - train asym MSE: 0.3187
Epoch 6/30 - train asym MSE: 0.3054
Epoch 7/30 - train asym MSE: 0.3129
Epoch 8/30 - train asym MSE: 0.2893
Epoch 9/30 - train asym MSE: 0.3079
Epoch 10/30 - train asym MSE: 0.3635
Epoch 11/30 - train asym MSE: 0.3039
Epoch 12/30 - train asym MSE: 0.3526
Epoch 13/30 - train asym MSE: 0.3772
Epoch 14/30 - train asym MSE: 0.3459
Epoch 15/30 - train asym MSE: 0.3890
Epoch 16/30 - train asym MSE: 0.4576
Epoch 17/30 - train asym MSE: 0.4286
Epoch 18/30 - train asym MSE: 0.5507
Epoch 19/30 - train asym MSE: 0.5542
Epoch 20/30 - train asym MSE: 0.6438
Epoch 21/30 - train asym MSE: 0.6053
Epoch 22/30 - train asym MSE: 0.5959
Epoch 23/30 - train asym MSE: 0.4874
Epoch 24/30 - train asym MSE: 0.4364
Epoch 25/30 - train asym MSE: 0.3588
Epoch 26/30 - train asym MSE: 0.3348
Epoch 27/30 - train asym MSE: 0.2976
Epoch 28/3

In [9]:
# Evaluation
model.eval()
with torch.no_grad():
    X_test_device = X_test_tensor.to(device)
    preds_test = model(X_test_device).cpu().numpy().flatten()

y_test_np = y_test_sr.values

rmse = np.sqrt(mean_squared_error(y_test_np, preds_test))
mae = mean_absolute_error(y_test_np, preds_test)
r2 = r2_score(y_test_np, preds_test)

print("\nNeural Network with Asymmetric Loss - Test Performance")
print("------------------------------------------------------")
print(f"RMSE: {rmse:.4f}")
print(f"MAE:  {mae:.4f}")
print(f"R²:   {r2:.4f}")


Neural Network with Asymmetric Loss - Test Performance
------------------------------------------------------
RMSE: 0.7991
MAE:  0.7114
R²:   0.6392
