In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Hyperparameters


In [2]:
COLUMNS_TO_KEEP = [
    "direct_rad:W",
    "clear_sky_rad:W",
    "diffuse_rad:W",
    "direct_rad_1h:J",
    "is_in_shadow:idx",
    "clear_sky_energy_1h:J",
    "diffuse_rad_1h:J",
    "is_day:idx",
    "sun_elevation:d",
    # "ceiling_height_agl:m",
    # "effective_cloud_cover:p",
    # "visibility:m",
    # "date_calc",
    "pv_measurement",
]
LEARNING_RATE = 0.001
NUM_EPOCHS = 100
BATCH_SIZE = 10
NUM_FEATURES = len(COLUMNS_TO_KEEP) - 1  # -1 because pv_measurement is the target
FEATURE_SIZE = 4  # 7 days of hourly data
WEIGHT_DECAY = 0.01

# Neural net


In [3]:
class SolarPredictionNet(nn.Module):
    def __init__(self, input_size, num_channels):
        super(SolarPredictionNet, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv1d(num_channels, 32, kernel_size=1, stride=1, padding=0)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=1, stride=1, padding=0)
        # self.pool = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)

        # Compute the output size after convolutions and pooling
        out_size = input_size // 4  # Divided by 4 because of two pooling layers

        # Fully connected layers
        self.fc1 = nn.Linear(out_size * 64, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        # x = self.pool(x)
        x = self.relu(self.conv2(x))
        # x = self.pool(x)

        # Flatten
        x = x.view(x.size(0), -1)

        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        return self.fc3(x)

# Load dataset


In [4]:
# Load data from Parquet files
df_data = pd.read_parquet("data/A/X_train_observed.parquet")
df_target = pd.read_parquet("data/A/train_targets.parquet")

# Merge the datasets
df_merged = pd.merge(
    df_data, df_target, left_on="date_forecast", right_on="time", how="inner"
)
df_merged = df_merged[COLUMNS_TO_KEEP]

# Separate the features and targets
y = df_merged["pv_measurement"]
X = df_merged.drop(
    "pv_measurement", axis=1
)  # Replace 'target_column_name' with the name of your target column


# Create a custom dataset
class SolarDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X.values, y.values, test_size=0.2, random_state=42
)



# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).unsqueeze(
    2
)  # [batch_size, channels, 1]
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).unsqueeze(2)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)

# Create datasets for training and validation
train_dataset = SolarDataset(X_train_tensor, y_train_tensor)
val_dataset = SolarDataset(X_val_tensor, y_val_tensor)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

# ... [Neural Network and Training code from previous messages]

# Training Loop


In [5]:
def train_model(model, train_loader, val_loader):
    criterion = nn.L1Loss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    for epoch in range(NUM_EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

        # Evaluate the model on the validation set
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data, target in val_loader:
                output = model(data)
                loss = criterion(output, target)
                val_loss += loss.item()

        # Average validation loss
        val_loss /= len(val_loader)

        print(
            f"Epoch {epoch + 1}/{NUM_EPOCHS}, Training Loss: {loss.item()}, Validation Loss: {val_loss}"
        )

    print("Training complete!")

In [6]:
model = SolarPredictionNet(FEATURE_SIZE, NUM_FEATURES)
train_model(model, train_loader, val_loader)

Epoch 1/100, Training Loss: 71.20064544677734, Validation Loss: 298.2879333929582
Epoch 2/100, Training Loss: 72.36201477050781, Validation Loss: 291.6570111605455
Epoch 3/100, Training Loss: 67.1571044921875, Validation Loss: 282.69557085984485
Epoch 4/100, Training Loss: 42.9172477722168, Validation Loss: 287.6387961183734
Epoch 5/100, Training Loss: 83.09991455078125, Validation Loss: 292.15552679858223
Epoch 6/100, Training Loss: 61.99330139160156, Validation Loss: 282.3620213370532
Epoch 7/100, Training Loss: 74.4435043334961, Validation Loss: 289.86020496959236
Epoch 8/100, Training Loss: 59.852500915527344, Validation Loss: 284.2400024948698
Epoch 9/100, Training Loss: 75.8277816772461, Validation Loss: 280.56453491862777
Epoch 10/100, Training Loss: 59.72639465332031, Validation Loss: 281.4918266595012
Epoch 11/100, Training Loss: 75.80879974365234, Validation Loss: 287.5402664985721
Epoch 12/100, Training Loss: 67.5129623413086, Validation Loss: 279.84710356121514
Epoch 13/100