In [1]:
import os
import torch
import pandas as pd
import numpy as np
import xarray as xr
import json
from dataset import Dataset
from torch.utils.data import DataLoader, IterableDataset
from torchinfo import summary
import torch.nn as nn
import torch.optim as optim
from ocf_blosc2 import Blosc2
import torch.nn.functional as F
from datetime import datetime, time, timedelta


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

#### **Download data**

The next block downloads the data from huggingface. This is only required if working on Google Colab OR data has not yet been downloaded locally. Expect this cell to run for up to 30 minutes.

##### **Load pv, hrv and indices**

In [4]:
pv_inc = pd.read_csv("pv_inc_full_first_method.csv").set_index(["timestamp", "ss_id"])
pv_df = pv_inc.reset_index()
pv_df["timestamp"] = pd.to_datetime(pv_df["timestamp"], utc=True)
pv_df = pv_df.set_index(["timestamp","ss_id"])
pv_inc = pv_df.drop(columns={"power", "angle_of_incidence_radians",})
pv = pv_df.drop(columns={"power_normalized", "angle_of_incidence_radians_normalized", "angle_of_incidence_radians", "angle_of_incidence_radians_normalized"})
pv.index = pv.index.set_levels([pv.index.levels[0].tz_localize(None), pv.index.levels[1]])
pv_inc.index = pv_inc.index.set_levels([pv_inc.index.levels[0].tz_localize(None), pv_inc.index.levels[1]])

# The parquet data here is similar to a dataframe. The "power" is the column with the other data types being indexes. The data is shaped with each timestamp being its own 
# subframe with the sites having their corresponding power (% of site capacity).  
hrv = []
for year in [2020, 2021]:
    for month in [6, 7, 8]:
        hrv.append(xr.open_dataset(f"data/satellite-hrv/{year}/{month}.zarr.zip", engine="zarr", chunks="auto"))

hrv = xr.concat(hrv, dim="time")

# Images are stored as vectors. The vectors are stored as an array of vectors. The arrays have a timestamp. Since there is only one channel (hrv)
# the array is a 1D set of vectors with the dimension being time. Read this to help you understand how this is being stored 
# https://tutorial.xarray.dev/fundamentals/01_datastructures.html
with open("indices.json") as f:
    site_locations = {
        data_source: {
            int(site): (int(location[0]), int(location[1]))
            for site, location in locations.items() #if site == '2607'#added this to run only 1 site location to understand how it works
        }
        for data_source, locations in json.load(f).items()
    }

  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


In [4]:
class ChallengeDataset(IterableDataset):
    def __init__(self, pv, hrv, site_locations, start_date = "2020-7-1", end_date = "2020-7-30", sites=None):
        self.pv = pv
        self.hrv = hrv
        self._site_locations = site_locations
        self._sites = sites if sites else list(site_locations["hrv"].keys())#This gets the individual site ids which are stored as the dict's keys
        self.start_date = list(map(int, start_date.split("-")))
        self.end_date= list(map(int, end_date.split("-")))

    def _get_image_times(self):#This function starts at the minimum date in the set and iterates up to the highest date, this is done as the data set is large and due to the nature of the parquette and xarray
        min_date = datetime(self.start_date[0], self.start_date[1], self.start_date[2])
        max_date = datetime(self.end_date[0], self.end_date[1], self.end_date[2])
        
        start_time = time(8)
        end_time = time(17)

        date = min_date 
        while date <= max_date: 
            current_time = datetime.combine(date, start_time)
            while current_time.time() < end_time:
                if current_time:
                    yield current_time

                current_time += timedelta(minutes=60)

            date += timedelta(days=1)

    def __iter__(self):
        for time in self._get_image_times():
            
             # generate time ids for predictions to be analysedm after training
            time_ids = pd.date_range(start=time + timedelta(hours=1),
                                      end=time + timedelta(hours=1)+timedelta(minutes=55),
                                      freq='5min')
            time_ids = time_ids.strftime('%Y-%m-%dT%H:%M:%S').tolist()  

            # 1 hour leading up to the predicton time        
            first_hour = slice(str(time), str(time + timedelta(minutes=55)))

            # PV power output in first hour
            pv_features = pv.xs(first_hour, drop_level=False)

            pv_targets = pv.xs(
                slice(  # type: ignore
                    str(time + timedelta(hours=1)),
                    str(time + timedelta(hours=1, minutes=55)),
                ),
                drop_level=False,
            )

           # hrv satellite images on first hour timestamps setting them up as an input feature
            hrv_data = self.hrv["data"].sel(time=first_hour).to_numpy()

            for site in self._sites:
                site_id = site

                try:
                    site_features = pv_features.xs(site, level=1).to_numpy().squeeze(-1) # gets the pixel based location of the pv site and then uses this to make predictions based on the individual sites
                    site_targets = pv_targets.xs(site, level=1).to_numpy().squeeze(-1)
                    assert site_features.shape == (12,) and site_targets.shape == (12,)
                 
                    x, y = self._site_locations["hrv"][site]
                    hrv_features = hrv_data[:, y - 4  : y + 4 ,
                                             x - 4  : x + 4 , 0]
                    assert hrv_features.shape == (12, 8, 8)

                except:
                    continue

                yield time_ids, site_id, site_features, hrv_features, site_targets

In [5]:
class ChallengeDataset_inc(IterableDataset):
    def __init__(self, pv_inc, hrv, site_locations, start_date="2020-7-1", end_date="2020-7-30", sites=None):
        self.pv_inc = pv_inc
        self.hrv = hrv
        self._site_locations = site_locations
        self._sites = sites if sites else list(site_locations["hrv"].keys())
        self.start_date = list(map(int, start_date.split("-")))
        self.end_date = list(map(int, end_date.split("-")))

    def _get_image_times(self):
        min_date = datetime(self.start_date[0], self.start_date[1], self.start_date[2])
        max_date = datetime(self.end_date[0], self.end_date[1], self.end_date[2])
        start_time = time(8)
        end_time = time(17)
        date = min_date
        while date <= max_date:
            current_time = datetime.combine(date, start_time)
            while current_time.time() < end_time:
                yield current_time
                current_time += timedelta(minutes=60)
            date += timedelta(days=1)

    def __iter__(self):
        for time in self._get_image_times():
            time_ids = pd.date_range(start=time + timedelta(hours=1),
                                     end=time + timedelta(hours=5) - timedelta(minutes=5),
                                     freq='5min').strftime('%Y-%m-%dT%H:%M:%S').tolist()
            first_hour = slice(str(time), str(time + timedelta(minutes=55)))
            pv_features = self.pv_inc.xs(first_hour, drop_level=False)[["power_normalized", "angle_of_incidence_radians_normalized"]]
            pv_targets = self.pv_inc.xs(slice(str(time + timedelta(hours=1)), 
                                     str(time + timedelta(hours=1, minutes=55))),
                                     drop_level=False)["power_normalized"]
            hrv_data = self.hrv['data'].sel(time=first_hour).to_numpy()

            for site in self._sites:
                site_id = site
                try:
                    site_features = pv_features.xs(site, level=1).to_numpy()
                    site_targets = pv_targets.xs(site, level=1).to_numpy()
                    assert site_features.shape == (12,2) and site_targets.shape == (12,)
                    x, y = self._site_locations["hrv"][site]
                    hrv_features = hrv_data[:, y - 1 : y + 1, x - 1 : x + 1, 0]
                    assert hrv_features.shape == (12, 2, 2)
                    yield time_ids, site_id, site_features, hrv_features, site_targets
                except:
                    continue

#### **Create train, validation and test datasets**

# **Model Training**

In [6]:
layers = [4, 4, 4, 4]

def conv_block(in_channels, out_channels, kernel_size=1, stride=1, padding=0):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    )

class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv_block(in_channels, out_channels, stride=stride)
        self.conv2 = conv_block(out_channels, out_channels)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out = out + identity
        return F.relu(out, inplace=False)

class ResNet_light_inc(nn.Module):
    def __init__(self, block, layers):
        super(ResNet_light_inc, self).__init__()
        self.in_channels = 12
        self.initial = conv_block(12, 12, kernel_size=1, stride=1, padding=0)
        self.maxpool = nn.MaxPool2d(kernel_size=1, stride=1, padding=0)
        self.layer1 = self._make_layer(block, 12, layers[0])
        self.layer2 = self._make_layer(block, 24, layers[1], stride=1)
        self.layer3 = self._make_layer(block, 48, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 96, layers[3], stride=1)
        self.avgpool = nn.AdaptiveMaxPool2d((1, 1))
        self.fc = nn.Linear(96  + 24, 12)#change back to + 2

    def _make_layer(self, block, out_channels, num_blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion),
            )
        layers = [block(self.in_channels, out_channels, stride, downsample)]
        self.in_channels = out_channels * block.expansion
        for _ in range(1, num_blocks):
            layers.append(block(self.in_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, pv_inc, hrv):
        x = self.initial(hrv)
        
        #x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
       # print(f"X Shape {x.shape}")
       
        power = pv_inc[:, :, 0]
        #print(f"Power: {pv_inc[:, :, 0]}")
        
        angle = pv_inc[:, :, 1]
        #print(f"{angle.shape}")

        #pv_inc = torch.flatten(pv_inc, start_dim=1)
        #print("Shape of x:", x.shape)
        #print("Shape of pv_inc:", pv_inc.shape)
        #print("Shape of power:", power.shape)
        #print("Shape of angle:", angle.shape)
        
        combined = torch.cat((x, power, angle), dim=1)
        if self.fc.in_features != combined.shape[1]:
            self.fc = nn.Linear(combined.shape[1], 12).to(combined.device)
        out = self.fc(combined)
        
        #print(f"Out shape {out.shape}")
        return out

model_light_res_inc_feature = ResNet_light_inc(BasicBlock, layers).to(device)

In [71]:
summary(model_light_res_inc_feature, input_size=[(2,12), (12, 2, 2)]) 

Initial HRV Shape: torch.Size([2, 12, 2, 2])
Flattened HRV Features Shape: torch.Size([2, 96])
Combined shape torch.Size([2, 120])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1             [-1, 12, 2, 2]             156
       BatchNorm2d-2             [-1, 12, 2, 2]              24
              ReLU-3             [-1, 12, 2, 2]               0
         MaxPool2d-4             [-1, 12, 2, 2]               0
            Conv2d-5             [-1, 12, 2, 2]             156
       BatchNorm2d-6             [-1, 12, 2, 2]              24
              ReLU-7             [-1, 12, 2, 2]               0
            Conv2d-8             [-1, 12, 2, 2]             156
       BatchNorm2d-9             [-1, 12, 2, 2]              24
             ReLU-10             [-1, 12, 2, 2]               0
       BasicBlock-11             [-1, 12, 2, 2]               0
           Conv2d-12             [-1

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

In [5]:
layers = [4, 4, 4, 4,] #For a deeper resnet with 16 total conv layers

def conv_block(in_channels, out_channels, kernel_size=1, stride=1, padding=0):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True))

class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv_block(in_channels, out_channels, stride=stride)
        self.conv2 = conv_block(out_channels, out_channels)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out = out + identity
        return F.relu(out, inplace=False)

class ResNet_light_deep_crop1_60m(nn.Module):
    
    def __init__(self, block, layers):
        
        super(ResNet_light_deep_crop1_60m, self).__init__()
        self.in_channels = 12 #reduce the stride
        self.initial = nn.Identity()
        #self.maxpool = nn.MaxPool2d(kernel_size=1, stride=1, padding=0)
        self.layer1 = self._make_layer(block, 12, layers[0])
        self.layer2 = self._make_layer(block, 24, layers[1], stride=1)
        self.layer3 = self._make_layer(block, 48, layers[2], stride=1)
        self.layer4 = self._make_layer(block, 96, layers[3], stride=1)
        self.avgpool = nn.AdaptiveMaxPool2d((1, 1))
        # Adjust this linear layer based on the concatenated size of HRV and PV features
        self.fc = nn.Linear(96  + 12, 12)  

    def _make_layer(self, block, out_channels, num_blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion),
            )
        layers = [block(self.in_channels, out_channels, stride, downsample)]
        self.in_channels = out_channels * block.expansion
        for _ in range(1, num_blocks):
            layers.append(block(self.in_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, pv, hrv ):
        #print("Initial HRV shape:", hrv.shape)  
        #print("Initial PV shape:", pv.shape) 
        #print(f"{pv[0]}")
        x = self.initial(hrv)
        #x = self.maxpool(x)
        #print("Shape after initial conv and maxpool:", x.shape)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        #print("Shape after ResNet_light blocks:", x.shape)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        pv = torch.flatten(pv, start_dim=1)


        if pv.dim() > 2:
            pv = torch.flatten(pv, start_dim=1)

        combined = torch.cat((x, pv), dim=1)
        
        

        if self.fc.in_features != combined.shape[1]:
            self.fc = nn.Linear(combined.shape[1], 12).to(combined.device)

        out = self.fc(combined)
        return out
model_pv = ResNet_light_deep_crop1_60m(BasicBlock, layers).to(device)

What does the flatterned layer</br>
Send shape of resnet

In [81]:
print(pv_inc.shape)


(34402607, 2)


In [7]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self, predicted, actual):
        return torch.sqrt(self.mse(predicted, actual))


def model_validation(model, criterion, validation_dataloader):
    model.eval() # This is used to set the model to evaluation mode
    with torch.no_grad(): # This is used to stop the model from storing gradients
        losses = []
        for pv_features, hrv_features, pv_targets in validation_dataloader:
            pv_features, hrv_features, pv_targets = pv_features.to(device, dtype=torch.float), hrv_features.to(device, dtype=torch.float), pv_targets.to(device, dtype=torch.float)
            predictions = model(pv_features, hrv_features)
            loss = criterion(predictions, pv_targets)
            losses.append(loss.item())

    model.train() # This is used to set the model back to training mode
    
    return sum(losses) / len(losses)

def model_validation_indv(model, criterion, validation_dataloader):
    model.eval()  # Set the model to evaluation mode
    individual_losses = []  # List to store each individual loss

    with torch.no_grad():  # Disable gradient computation
        for pv_features, hrv_features, pv_targets in validation_dataloader:
            pv_features, hrv_features, pv_targets = pv_features.to(device, dtype=torch.float), hrv_features.to(device, dtype=torch.float), pv_targets.to(device, dtype=torch.float)

            predictions = model(pv_features, hrv_features)  # Get model predictions
            
            # Calculate loss for each individual in the batch
            individual_batch_losses = criterion(predictions, pv_targets, reduction='none')  # This should return a tensor of losses for each item in the batch
            
            individual_losses.extend(individual_batch_losses.tolist())  # Convert tensor to list and append to the list of losses
            
    model.train()  # Set the model back to training mode
    return individual_losses
criterion = RMSELoss()

In [8]:
model = model_light_res_inc_feature
optimiser = optim.Adam(model.parameters(), lr=1e-3)

In [7]:
model= model_pv 

optimiser = optim.Adam(model.parameters(), lr=1e-3)

In [9]:
BATCH_SIZE=64
train_dataset = ChallengeDataset_inc(pv_inc, hrv, site_locations=site_locations,
                                 start_date="2020-6-1", end_date="2020-8-31")  # controls which data is loaded in
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, pin_memory=True)#try with shuffle

validation_dataset = ChallengeDataset_inc(pv_inc, hrv, site_locations=site_locations,
                                      start_date="2021-7-01", end_date="2021-7-31") 
validation_dataloader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, pin_memory=True) 

In [None]:
#from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau

# Example using StepLR
##scheduler = StepLR(optimizer, step_size=100, gamma=0.1)

# Example using ReduceLROnPlateau
#scheduler = ReduceLROnPlateau(optimizer, 'min', patience=10, factor=0.1, verbose=True)

Currently running 2x2 buffer


In [10]:
# model 
EPOCHS = 10
training_losses = []
validation_losses = []
epoch_train_losses = []
epoch_validation_losses = []
for epoch in range(EPOCHS):
    model.train()

    running_loss = 0.0 ##sets the starting loss at zero
    count = 0 #is used to keep track of the number of batches passed through the training model
  
    for i, (time_ids, site_id, pv_features, hrv_features, pv_targets) in enumerate(train_dataloader): 
        
        optimiser.zero_grad()
        predictions = model(
            pv_features.to(device, dtype=torch.float),
            hrv_features.to(device, dtype=torch.float),
        )
        loss = criterion(predictions, pv_targets.to(device, dtype=torch.float))
        loss.backward()
        optimiser.step()

        running_loss += loss.item() * pv_targets.size(0)
        count += pv_targets.size(0)

        if i % 200 == 199:
            
            batch_loss = running_loss / count
            training_losses.append(batch_loss)

            print(f"Epoch {epoch + 1}, {i + 1}: {batch_loss}")
            
            # print(f"     Training Loss: {batch_loss}")

            # validation_loss = model_validation(model, criterion, validation_dataloader)
            # validation_losses.append(validation_loss)
            # print(f"     Validation Loss: {validation_loss}\n")
            
    
    epoch_train_loss = running_loss / count
    epoch_train_losses.append(epoch_train_loss)

    # epoch_validation_loss = model_validation(model, criterion, validation_dataloader)
    # epoch_validation_losses.append(epoch_validation_loss)

    # print(f"Epoch {epoch + 1}, Training Loss: {epoch_train_loss}")
    # print(f"Epoch {epoch + 1}, Validation Loss: {epoch_validation_loss}")

Epoch 1, 200: 0.2131335947662592
Epoch 1, 400: 0.17861237705685198
Epoch 1, 600: 0.1792036269667248
Epoch 1, 800: 0.17375027772504836
Epoch 1, 1000: 0.16229482089355587
Epoch 1, 1200: 0.1577114818741878
Epoch 1, 1400: 0.15638374395402416
Epoch 1, 1600: 0.15496777579188348
Epoch 1, 1800: 0.15215345681127573
Epoch 1, 2000: 0.1548297593705356
Epoch 1, 2200: 0.15159770031544295
Epoch 1, 2400: 0.14843309002617994
Epoch 1, 2600: 0.14960618941829754
Epoch 1, 2800: 0.14774595565960877
Epoch 1, 3000: 0.14756656464437642
Epoch 1, 3200: 0.1450859531026799
Epoch 1, 3400: 0.1468070563925978
Epoch 1, 3600: 0.14499559611702958
Epoch 1, 3800: 0.14643378915265204
Epoch 1, 4000: 0.1461453473707661
Epoch 1, 4200: 0.1444194189405867
Epoch 1, 4400: 0.14440454070456327
Epoch 1, 4600: 0.14544404218783197
Epoch 1, 4800: 0.14605372358191138
Epoch 1, 5000: 0.14541700266823174
Epoch 1, 5200: 0.1455001534256511
Epoch 1, 5400: 0.14550061373591974
Epoch 1, 5600: 0.14486867137120238
Epoch 1, 5800: 0.1449904645349959

In [11]:
model_name = "ResNet_light_deep_crop2_inc_10epoch"
os.makedirs(f"models/{model_name}", exist_ok=True)

# Save the variables used to make the dataset to a text file
with open(f"models/{model_name}/data_summary.txt", "w") as f:
    f.write("BATCH_SIZE = "+ str(BATCH_SIZE))
            
    
# Save the trained model for future predictions
torch.save(model.state_dict(), f"models/{model_name}/trained_model.pt")

# Create a DataFrame from the training_losses and validation_losses lists
df = pd.DataFrame({'Training Losses': training_losses})
# df = pd.DataFrame({'Training Losses': batch_losses, 'Validation Losses': validation_losses})

# Export the DataFrame to a CSV file
df.to_csv(f'models\{model_name}\losses.csv', index=False)

In [25]:
model.load_state_dict(torch.load("models/ResNet_light_deep_crop1_inc_128/trained_model.pt"))
model.eval()


ResNet_light_inc(
  (initial): Sequential(
    (0): Conv2d(12, 12, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (maxpool): MaxPool2d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Sequential(
        (0): Conv2d(12, 12, kernel_size=(1, 1), stride=(1, 1))
        (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
      (conv2): Sequential(
        (0): Conv2d(12, 12, kernel_size=(1, 1), stride=(1, 1))
        (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
    )
    (1): BasicBlock(
      (conv1): Sequential(
        (0): Conv2d(12, 12, kernel_size=(1, 1), stride=(1, 1))
        (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_

In [None]:
def _eval_visual(dataloader, model, device):
    model.eval()

    predictions_list = []
    timestamps_list = []
    pv_targets_list = []  # List to store pv_targets for each batch

    with torch.no_grad():
        for i, (time_ids, site_id, pv_features, hrv_features, pv_targets) in enumerate(dataloader):
            hrv_features = hrv_features.to(device, dtype=torch.float)
            pv_features = pv_features.to(device, dtype=torch.float)
            pv_targets = pv_targets.to(device, dtype=torch.float)
            
            batch_predictions = model(pv_features, hrv_features)
            batch_predictions = batch_predictions.cpu().numpy()
            batch_pv_targets = pv_targets.cpu().numpy()  # Convert pv_targets to numpy array

            # Timestamp processing as before
            if isinstance(time_ids[0], tuple) or isinstance(time_ids[0], list):
                single_timestamp = time_ids[0][0]
            else:
                single_timestamp = time_ids[0]
            if isinstance(single_timestamp, datetime):
                timestamp = single_timestamp.strftime('%Y-%m-%d %H:%M:%S')
            else:
                timestamp = str(single_timestamp)
            
            # Append each batch's data to the lists
            predictions_list.append(batch_predictions)
            pv_targets_list.append(batch_pv_targets)  # Append pv_targets to its list
            batch_timestamps = [timestamp] * batch_predictions.shape[0]
            timestamps_list.extend(batch_timestamps)

    # Concatenate all collected arrays into single numpy arrays
    predictions = np.concatenate(predictions_list, axis=0)
    pv_targets = np.concatenate(pv_targets_list, axis=0)  # Concatenate all pv_targets

    # Convert to DataFrame
    predictions_df = pd.DataFrame(predictions)
    pv_targets_df = pd.DataFrame(pv_targets, columns=[f'target_{i}' for i in range(pv_targets.shape[1])])
    timestamps_df = pd.DataFrame(timestamps_list, columns=['timestamp'])

    # Combine timestamps, predictions, and targets by using index alignment
    final_df = pd.concat([timestamps_df, predictions_df, pv_targets_df], axis=1)
    
    return final_df

In [None]:
prediction_df = _eval_visual(validation_dataloader, model, device)
prediction_df[['date', 'time']] = prediction_df['timestamp'].str.split('T', expand=True)
timestamp_index = prediction_df.columns.get_loc('timestamp')
prediction_df.insert(timestamp_index, 'date', prediction_df.pop('date'))
prediction_df.insert(timestamp_index + 1, 'time', prediction_df.pop('time'))
prediction_df.drop('timestamp', axis=1, inplace=True)
predictions_RMSE = prediction_df[11]
targets_RMSE = prediction_df['target_11']
mse = (predictions_RMSE - targets_RMSE) ** 2
rmse = np.sqrt(mse)
prediction_df['RMSE_1 hr'] = rmse

prediction_df.to_csv('predictions.csv', index=False)
print(prediction_df)