In [1]:
#%pip install numpy matplotlib zarr xarray ipykernel gcsfs fsspec dask cartopy ocf-blosc2 torchinfo
#%pip install -U doxa-cli

Note: you may need to restart the kernel to use updated packages.




Collecting doxa-cli
  Downloading doxa_cli-0.1.7-py3-none-any.whl (16 kB)
Collecting requests~=2.26.0
  Using cached requests-2.26.0-py2.py3-none-any.whl (62 kB)
Installing collected packages: requests, doxa-cli
  Attempting uninstall: requests
    Found existing installation: requests 2.31.0
    Uninstalling requests-2.31.0:
      Successfully uninstalled requests-2.31.0
  Attempting uninstall: doxa-cli
    Found existing installation: doxa-cli 0.1.5
    Uninstalling doxa-cli-0.1.5:
      Successfully uninstalled doxa-cli-0.1.5
Successfully installed doxa-cli-0.1.7 requests-2.26.0
Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
anaconda-project 0.11.1 requires ruamel-yaml, which is not installed.
translate-json 0.0.2 requires certifi==2021.5.30, but you have certifi 2023.5.7 which is incompatible.
translate-json 0.0.2 requires idna==3.2; python_version >= "3", but you have idna 2.10 which is incompatible.
osmnx 1.9.1 requires requests>=2.27, but you have requests 2.26.0 which is incompatible.
conda-repo-cli 1.0.20 requires clyent==1.2.1, but you have clyent 1.2.2 which is incompatible.
conda-repo-cli 1.0.20 requires nbformat==5.4.0, but you have nbformat 5.5.0 which is incompatible.
conda-repo-cli 1.0.20 requires requests==2.28.1, but you have requests 2.26.0 which is incompatible.


## Importing packages

Here, we import a number of packages we will need to train our first model.

In [1]:
import os
from datetime import datetime, time, timedelta
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import xarray as xr
from ocf_blosc2 import Blosc2
from torch.utils.data import DataLoader, IterableDataset
from torchinfo import summary
import json
import geopandas as gpd
import seaborn as sns
from scipy.ndimage import zoom
import numpy as np
import h5py

plt.rcParams["figure.figsize"] = (20, 12)

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device

device(type='cpu')

In [3]:
if not os.path.exists("submission"):
     os.makedirs("submission", exist_ok=True)
     #Installing locally means you do not need to rerun this each time you restart the notebook
     !curl -L https://raw.githubusercontent.com/climatehackai/getting-started-2023/main/submission/competition.py --output submission/competition.py
     !curl -L https://raw.githubusercontent.com/climatehackai/getting-started-2023/main/submission/doxa.yaml --output submission/doxa.yaml
     !curl -L https://raw.githubusercontent.com/climatehackai/getting-started-2023/main/submission/model.py --output submission/model.py
     !curl -L https://raw.githubusercontent.com/climatehackai/getting-started-2023/main/submission/run.py --output submission/run.py
     !curl -L https://raw.githubusercontent.com/climatehackai/getting-started-2023/main/indices.json --output indices.json
if not os.path.exists("data"):
    os.makedirs("data/pv/2020", exist_ok=True)
    os.makedirs("data/satellite-hrv/2020", exist_ok=True)

    !curl -L https://huggingface.co/datasets/climatehackai/climatehackai-2023/resolve/main/pv/metadata.csv --output data/pv/metadata.csv
    !curl -L https://huggingface.co/datasets/climatehackai/climatehackai-2023/resolve/main/pv/2020/7.parquet --output data/pv/2020/7.parquet
    !curl -L https://huggingface.co/datasets/climatehackai/climatehackai-2023/resolve/main/satellite-hrv/2020/7.zarr.zip --output data/satellite-hrv/2020/7.zarr.zip
pv = pd.read_parquet("data/pv/2020/7.parquet").drop("generation_wh", axis=1)
#The parquet data here is similar to a dataframe. The "power" is the column with the other data types being indexes. The data is shaped with each timestamp being its own sub frame with the sites having their corresponding power (I think this is the % of their total possible yield).  
hrv = xr.open_dataset(
    "data/satellite-hrv/2020/7.zarr.zip", engine="zarr", chunks="auto"
)
#The way that this works is that it stores the image as a vector. The vectors are stored as an array of vectors. These then have a timestamp, as we only have one channel the array is a 1D set of vectors with the dimension being time. Read this to help you understand how this is being stored https://tutorial.xarray.dev/fundamentals/01_datastructures.html
# To access I have included some examples below
#The float value (float16-float64) shows the precision with which data is stored. Later on it is important to make sure that when you are feeding in data into the model that the float type matches between data types, this currently is not a problem when only using the HRV data. I am not yet sure if this will be a problem when using the NWP data.

with open("indices.json") as f:
    site_locations = {
        data_source: {
            int(site): (int(location[0]), int(location[1]))
            for site, location in locations.items()
        }
        for data_source, locations in json.load(f).items()
    }

In [26]:
hrv["data"] #This stores all of the data within this channel as a 16 bit set of floats with 47 chunks. The cube on the side shows how the each image is stored.

Unnamed: 0,Array,Chunk
Bytes,5.07 GiB,110.44 MiB
Shape,"(6721, 592, 684, 1)","(143, 592, 684, 1)"
Count,48 Tasks,47 Chunks
Type,float16,numpy.ndarray
"Array Chunk Bytes 5.07 GiB 110.44 MiB Shape (6721, 592, 684, 1) (143, 592, 684, 1) Count 48 Tasks 47 Chunks Type float16 numpy.ndarray",6721  1  1  684  592,

Unnamed: 0,Array,Chunk
Bytes,5.07 GiB,110.44 MiB
Shape,"(6721, 592, 684, 1)","(143, 592, 684, 1)"
Count,48 Tasks,47 Chunks
Type,float16,numpy.ndarray


In [39]:
print(hrv["time"][0])
print(hrv["time"][-1])
#There seems to be some discrepancy with the time dimensions beyond the resolution. Multiplying out the nwp dataset to 12x its size shows that there is some mismatch in the datasets, I am not sure why yet. 

<xarray.DataArray 'time' ()>
array('2020-07-01T04:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     datetime64[ns] 2020-07-01T04:00:00
<xarray.DataArray 'time' ()>
array('2020-07-31T22:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     datetime64[ns] 2020-07-31T22:00:00


Importing and treating the nwp data, this downloads quickly so no need to download locally

In [51]:
nwp = xr.open_dataset(
    "zip:///::https://huggingface.co/datasets/climatehackai/climatehackai-2023/resolve/main/weather/2020/7.zarr.zip",
    engine="zarr",
    consolidated=True,
)
#note that this only covers one month in this download
nwp

In [None]:
#here I am attempting to resample the nwp data using K-nearest neighbours so that the size of the images match

If you want to resample different channels then simply change the channel selected in the square brackets' just make sure that you change what you're saving it as

In [74]:
clch_data = nwp["clch"].values  # This extracts the numpy array from the xarray DataArray
target_height = len(hrv["y_geostationary"])  # y dimension from HRV
target_width = len(hrv["x_geostationary"]) 
# Calculate the resampling scale factors assuming you know the desired output dimensions
# For example, if your HRV data is twice as high resolution in both dimensions:
scale_y = target_height / clch_data.shape[1]
scale_x = target_width / clch_data.shape[2]

# Perform nearest-neighbor upscaling, this is quick.
resampled_clch = np.array([zoom(clch_data_slice, (scale_y, scale_x), order=0) for clch_data_slice in clch_data])


In [21]:
#here I am recreating the xarray that contains the nwp data with the resampled infrared imagery 
lat_values = nwp["latitude"].values
lon_values = nwp["longitude"].values

new_lat = np.linspace(start=lat_values[0], stop=lat_values[-1], num=len(hrv["y_geostationary"]))
new_lon = np.linspace(start=lon_values[0], stop=lon_values[-1], num=len(hrv["x_geostationary"]))

knn_nwp = xr.DataArray(
    data=resampled_clch,
    dims=["time", "latitude", "longitude"],
    coords={
        "time": nwp["time"].values,
        "latitude": new_lat,
        "longitude": new_lon,
    },
    name="clch"#change this to match the channel
)

knn_nwp.attrs = nwp.attrs
knn_nwp.isel(time=9).plot()#cross checking this against the same time for the nwp xarray shows that the dataset has been correctly remade
plt.show()

As part of the challenge, you can make use of satellite imagery, numerical weather prediction and air quality forecast data in a `[128, 128]` region centred on each solar PV site. In order to help you out, we have pre-computed the indices corresponding to each solar PV site and included them in `indices.json`, which we can load directly. For more information, take a look at the [challenge page](https://doxaai.com/competition/climatehackai-2023).


In [None]:
#how do I test that the HRV sites and the knn_nwp sites match locations

In [40]:
#If we want to create a validation dataset we will need to create something else like this for a different time period.

## Used for creating the dataloader that passes the data to the model, this needs to be changed if we want to pass in non-hrv data

In [8]:
#This function extracts the area around each individual site using the PV dicts pixel based (as in the location of sites are determined by their pixel in the image) location and then extracts an area around each site. These areas are combined, based on their timestamp,
#with the HRV data that then has its satellite imagery data extracted. This implies that the model is using subsets of the satellite imagery to train the model to make predictions for each site rather than using the whole image and then "learning" where the sites are.

class ChallengeDataset(IterableDataset):#This function sets up the data so that it can be iterated through by the CNN
    def __init__(self, pv, hrv, site_locations, sites=None):#The "self" augmentation here is used to use create a shared class between the different data types that are then iterable based on their shared timestamp
        self.pv = pv
        self.hrv = hrv
        self._site_locations = site_locations
        self._sites = sites if sites else list(site_locations["hrv"].keys())#This gets the individual site ids which are stored as the dict's keys

    def _get_image_times(self):#This function starts at the minimum date in the set and iterates up to the highest date, this is done as the data set is large and due to the nature of the parquette and xarray
        min_date = datetime(2020, 7, 1)
        max_date = datetime(2020, 7, 31)
        #max and min need to be changed if we use more than one month of data
        start_time = time(8)
        end_time = time(17)

        date = min_date#starts at the first timestamp
        while date <= max_date: #iterates through up to the max
            current_time = datetime.combine(date, start_time)
            while current_time.time() < end_time:
                if current_time:
                    yield current_time

                current_time += timedelta(minutes=60)

            date += timedelta(days=1)

    def __iter__(self):
        for time in self._get_image_times():
            first_hour = slice(str(time), str(time + timedelta(minutes=55)))#gets the time and then uses this to select the corresponding time from the pv set  

            pv_features = pv.xs(first_hour, drop_level=False)  # this gets the pv yield of the current timestamp selected earlier
            pv_targets = pv.xs(
                slice(  # type: ignore
                    str(time + timedelta(hours=1)),
                    str(time + timedelta(hours=4, minutes=55)),
                ),
                drop_level=False,
            )#pv targets defines the time span over which we are trying to make pv yield predictions

            hrv_data = self.hrv["data"].sel(time=first_hour).to_numpy()#gets the hrv satellite image that is associated with the first hour timestamp setting it up as an input feature

            for site in self._sites:
                try:
                    # Get solar PV features and targets, the site_targets is used to find the models loss
                    site_features = pv_features.xs(site, level=1).to_numpy().squeeze(-1)#gets the pixel based location of the pv site and then uses this to make predictions based on the individual sites
                    site_targets = pv_targets.xs(site, level=1).to_numpy().squeeze(-1)
                    assert site_features.shape == (12,) and site_targets.shape == (48,)#compresses the data from N dimensions to 12 and 48 respectively

                    # Get a 128x128 HRV crop centred on the site over the previous hour
                    x, y = self._site_locations["hrv"][site]#gets the location of the site based on the pv sites pixel level location
                    hrv_features = hrv_data[:, y - 64 : y + 64, x - 64 : x + 64, 0]
                    assert hrv_features.shape == (12, 128, 128)#crops the image to be be 128x128 around the site
                    #asset is used to force the dimensions of the extracted site level image to be the same
                    # How might you adapt this for the non-HRV, weather and aerosol data?
                except:
                    continue

                yield site_features, hrv_features, site_targets

## ##########comments are the same and this does not work yet############

Here I am including the nwp data into the dataloader using the resampled clch data (resampled only on pixel dimensions), we need to figure out a way to overcome the time resolution conflict.

In [33]:
#This function extracts the area around each individual site using the PV dicts pixel based (as in the location of sites are determined by their pixel in the image) location and then extracts an area around each site. These areas are combined, based on their timestamp,
#with the HRV data that then has its satellite imagery data extracted. This implies that the model is using subsets of the satellite imagery to train the model to make predictions for each site rather than using the whole image and then "learning" where the sites are.

#Note the IterableDataset here inherits properties from the library it is attached to
class ChallengeDataset_nwp(IterableDataset):#This function sets up the data so that it can be iterated through by the CNN
    def __init__(self, pv, hrv, knn_nwp, site_locations, sites=None):#The "self" augmentation here is used to use create a shared class between the different data types that are then iterable based on their shared timestamp
        self.pv = pv
        self.hrv = hrv
        self.knn_nwp = knn_nwp#adding the nwp xarray 
        self._site_locations = site_locations
        self._sites = sites if sites else list(site_locations["hrv"].keys())#This gets the individual site ids which are stored as the dict's keys

    def _get_image_times(self):#This function starts at the minimum date in the set and iterates up to the highest date, this is done as the data set is large and due to the nature of the parquette and xarray
        min_date = datetime(2020, 7, 1)
        max_date = datetime(2020, 7, 2) #I have changed this to see if the model works
        #max and min need to be changed if we use more than one month of data
        start_time = time(8)
        end_time = time(17)

        date = min_date#starts at the first timestamp
        while date <= max_date: #iterates through up to the max
            current_time = datetime.combine(date, start_time)
            while current_time.time() < end_time:
                if current_time:
                    yield current_time

                current_time += timedelta(minutes=60)

            date += timedelta(days=1)

    def __iter__(self):
        for time in self._get_image_times():
            first_hour = slice(str(time), str(time + timedelta(minutes=55)))#gets the time and then uses this to select the corresponding time from the pv set  

            pv_features = pv.xs(first_hour, drop_level=False)  # this gets the pv yield of the current timestamp selected earlier
            pv_targets = pv.xs(
                slice(  # type: ignore
                    str(time + timedelta(hours=1)),
                    str(time + timedelta(hours=4, minutes=55)),
                ),
                drop_level=False,
            )#pv targets defines the time span over which we are trying to make pv yield predictions

            hrv_data = self.hrv["data"].sel(time=first_hour).to_numpy()#gets the hrv satellite image that is associated with the first hour timestamp setting it up as an input feature
            
            knn_nwp_data = self.knn_nwp.sel(time=first_hour).to_numpy()
            
            for site in self._sites:
                try:
                    # Get solar PV features and targets
                    site_features = pv_features.xs(site, level=1).to_numpy().squeeze(-1)#gets the pixel based location of the pv site and then uses this to make predictions based on the individual sites
                    site_targets = pv_targets.xs(site, level=1).to_numpy().squeeze(-1)
                    combined_features = np.concatenate((hrv_features, nwp_features), axis=-1)
                    assert site_features.shape == (12,)
                    assert site_targets.shape == (48,)
                    assert combined_features.shape[1:] == (12, 128, 128) #compresses the data from N dimensions to 12 and 48 respectively

                    # Get a 128x128 HRV crop centred on the site over the previous hour
                    x, y = self._site_locations["hrv"][site]#gets the location of the site based on the pv sites pixel level location
                    hrv_features = hrv_data[:, y - 64 : y + 64, x - 64 : x + 64, 0]
                    nwp_features = knn_nwp_data[:, y - 64 : y + 64, x - 64 : x + 64]
                    assert combined_features.shape == (12, 128, 128)#crops the image to be be 128x128 around the site
                    #asset is used to force the dimensions of the extracted site level image to be the same
                    # How might you adapt this for the non-HRV, weather and aerosol data?
                except:
                    continue

                yield site_features, combined_features, site_targets

Creating a schematic for the articecture of whatever CNN/CNNs we use should be done for the final submission 


In [9]:
#This model takes the inputs of the PV data and the HRV data to make predictions for the pv based on the HRV feature data.
#This step dictates what the actual shape of the CNN is i.e how many layers, what type of connections they have, and how what the kernel size is. 
# We can load in models here from our github repo and work on them seperately, we will need to be mindfull of the fact that changing the model may require us the change the way in which we test and train them.
#
class Model(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()
        #sets up four convolutional layers which have increasingly large output channels
        self.conv1 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3)#nn. is part of the pytorch model
        self.conv2 = nn.Conv2d(in_channels=24, out_channels=48, kernel_size=3)
        self.conv3 = nn.Conv2d(in_channels=48, out_channels=96, kernel_size=3)
        self.conv4 = nn.Conv2d(in_channels=96, out_channels=192, kernel_size=3)
        #Setting up the pooling layer that reduces the conv layers dimensions, we can tweak this to reduce the computational load by changing the kernel size.
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.flatten = nn.Flatten()#creates a 1D tensor (matrix) that can then be passed to the fully connected layer

        self.linear1 = nn.Linear(6924, 48)

    def forward(self, pv, hrv):#takes the inputs of the pb and the hrv data using Relu activation functions
        x = torch.relu(self.pool(self.conv1(hrv)))#passes the hrv data through the convolutional layers defined before by defining it as x.
        x = torch.relu(self.pool(self.conv2(x)))
        x = torch.relu(self.pool(self.conv3(x)))
        x = torch.relu(self.pool(self.conv4(x)))

        x = self.flatten(x)
        x = torch.concat((x, pv), dim=-1) # uses the shared dimension to combine the flattened layers (time is the shared dimension?)

        x = torch.sigmoid(self.linear1(x))#uses this to then make predictions (as the activation function is sigmoid is the model just trying to predict if the site is outputting power or not?)

        return x

In [None]:
#how do we make sure that thi is sequence to sequence and not a sequence to one 

In [10]:
# Import the model defined in `submission/model.py`

from submission.model import Model

# Summarises the model created above, used to sense check that the data we are passing through is correct and shows the overall structure of the model.

In [11]:
summary(Model(), input_size=[(1, 12), (1, 12, 128, 128)])#Use this to see what your model looks like before running it
#The input size here is used to prime the model summary with the input dimensions it should be expecting.
#Here we have the pv data first (1,12) it has a univariate dimension for each site with 12 dimensions per hour.
#Next we have the hrv data for each site, I think this is done as 1 observation 12 times an hour, with 128 x 128 dimension vector for the image.

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [1, 48]                   --
├─Conv2d: 1-1                            [1, 24, 126, 126]         2,616
├─MaxPool2d: 1-2                         [1, 24, 63, 63]           --
├─Conv2d: 1-3                            [1, 48, 61, 61]           10,416
├─MaxPool2d: 1-4                         [1, 48, 30, 30]           --
├─Conv2d: 1-5                            [1, 96, 28, 28]           41,568
├─MaxPool2d: 1-6                         [1, 96, 14, 14]           --
├─Conv2d: 1-7                            [1, 192, 12, 12]          166,080
├─MaxPool2d: 1-8                         [1, 192, 6, 6]            --
├─Flatten: 1-9                           [1, 6912]                 --
├─Linear: 1-10                           [1, 48]                   332,400
Total params: 553,080
Trainable params: 553,080
Non-trainable params: 0
Total mult-adds (M): 137.13
Input size (MB): 0.79
Forward/back

## Training models
This generates weights for the model that we can then use for validation. The weights are then saved as the model submission meaning that each time we generate weights we can then save the weights along with the associated model.

In [12]:
BATCH_SIZE = 32#This controls the number of sites that predictions are made for I think
#these are used to load in the data based on the previously defined functions above, the above functions can be altered to change how the data is ingested
dataset = ChallengeDataset(pv, hrv, site_locations=site_locations)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, pin_memory=True)#change this to alter which type of data is being loaded in

# This is for the model that uses nwp data

In [42]:
BATCH_SIZE = 32
#these are used to load in the data based on the previously defined functions above, the above functions can be altered to change how the data is ingested
dataset_nwp = ChallengeDataset_nwp(pv, hrv, knn_nwp, site_locations=site_locations)
dataloader_nwp = DataLoader(dataset_nwp, batch_size=BATCH_SIZE, pin_memory=True)#change this to alter which type of data is being loaded in
#note no need to change the DataLoader as that is a pytorch function

In [13]:
model = Model().to(device)

criterion = nn.L1Loss()#Here we are defining the test stat as MAE
optimiser = optim.Adam(model.parameters(), lr=1e-3)#Here we are defining the optimiser in this case it is ADAM 

In [14]:
EPOCHS = 1
batch_losses = []
val_losses = []
epoch_train_losses = []
epoch_val_losses = []
for epoch in range(EPOCHS):
    model.train()

    running_loss = 0.0##sets the starting loss at zero
    count = 0#is used to keep track of the number of batches passed through the training model
    for i, (pv_features, hrv_features, pv_targets) in enumerate(dataloader): 
        
        optimiser.zero_grad()#resets the gradient of all the previous weights and biases used in the model, can be changed to alter the type of optimiser we use

        predictions = model(
            pv_features.to(device, dtype=torch.float),
            hrv_features.to(device, dtype=torch.float),
        )#makes predictions based off of current batch of hrv and pv inputs

        loss = criterion(predictions, pv_targets.to(device, dtype=torch.float))#calculates the loss between the models predictions and the actual pv
        loss.backward()#backprops the loss

        optimiser.step()#updates the parameters based on the calculated loss
        ###for generating the training and test loss graph
        running_loss += loss.item() * pv_targets.size(0)
        count += pv_targets.size(0)
        
        size = int(pv_targets.size(0))#calculates the size of the first dimension of the pv_targets tensor  to determine how many data points are in the current tensor
        running_loss += float(loss) * size
        count += size
        #prints the current training loss for the first 200 data points of 32 batches, then prints again once the next 200 have been computed
        if i % 200 == 199:
            print(f"Epoch {epoch + 1}, {i + 1}: {running_loss / count}")
    epoch_train_loss = running_loss / count
    epoch_train_losses.append(epoch_train_loss)        
    print(f"Epoch {epoch + 1}: {running_loss / count}")
    
    

Epoch 1, 200: 0.13166551120579242
Epoch 1, 400: 0.13606945174280555
Epoch 1, 600: 0.12585767212634286
Epoch 1, 800: 0.10999788088724018
Epoch 1, 1000: 0.10931074482947588
Epoch 1, 1200: 0.11402902331358443
Epoch 1, 1400: 0.11960907561039286
Epoch 1, 1600: 0.11764176941127516
Epoch 1, 1800: 0.11452542685282727
Epoch 1, 2000: 0.11179030675208196
Epoch 1, 2200: 0.11240906901911578
Epoch 1, 2400: 0.11553431363310665
Epoch 1, 2600: 0.11660065424270355
Epoch 1, 2800: 0.12000395101628133
Epoch 1, 3000: 0.12053573680110276
Epoch 1, 3200: 0.119658539275988
Epoch 1, 3400: 0.11770887857029105
Epoch 1, 3600: 0.11688262185806202
Epoch 1, 3800: 0.11680877764023057
Epoch 1, 4000: 0.11763051717914641
Epoch 1, 4200: 0.119495969720717
Epoch 1, 4400: 0.12129637309053744
Epoch 1, 4600: 0.12275176571358157
Epoch 1, 4800: 0.1243056332168635
Epoch 1, 5000: 0.12466359049938619
Epoch 1, 5200: 0.12380809389520436
Epoch 1, 5400: 0.12380284551220636
Epoch 1, 5600: 0.12432606325850688
Epoch 1, 5800: 0.123884615676

In [61]:
torch.save(model.state_dict(), "submission/model.pt")

NWP model training

In [235]:
EPOCHS = 1

for epoch in range(EPOCHS):
    model.train()

    running_loss = 0.0##sets the starting loss at zero
    count = 0#sets the count value for iterating to start at the zeroth point (I think this is the zeroth time slice taken earlier)
    for i, (pv_features, hrv_features, pv_targets) in enumerate(dataloader_nwp): 
        optimiser.zero_grad()#resets the gradient of all the previous weights and biases used in the model, can be changed to alter the type of optimiser we use

        predictions = model(
            pv_features.to(device, dtype=torch.float),
            hrv_features.to(device, dtype=torch.float),
        )#makes predictions based off of current batch of hrv and pv inputs

        loss = criterion(predictions, pv_targets.to(device, dtype=torch.float))#calculates the loss between the models predictions and the actual pv
        loss.backward()#backprops the loss

        optimiser.step()#updates the parameters based on the calculated loss

        size = int(pv_targets.size(0))#calculates the size of the first dimension of the pv_targets tensor  to determine how many data points are in the current tensor
        running_loss += float(loss) * size
        count += size
        #prints the current training loss for the first 200 data points, then prints again once the next 200 have been computed
        if i % 200 == 199:
            print(f"Epoch {epoch + 1}, {i + 1}: {running_loss / count}")

    print(f"Epoch {epoch + 1}: {running_loss / count}")

To validate our model we will need to create a validation dataset in the same way the training set is created as the model is sensitive to how the data is loaded into it

In [63]:

from competition import BaseEvaluator
from model import Model

In [68]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class Evaluator(BaseEvaluator):
    def setup(self) -> None:
        model_path = "C:\\Users\\james\\OneDrive - University of Bristol\\Desktop\\UCL\\aml\\submission\\model.pt"

        
        self.model = Model().to(device)
        # Use the full path to load the model
        self.model.load_state_dict(torch.load(model_path, map_location=device))
        self.model.eval()
        """Sets up anything required for evaluation, e.g. loading a model."""

        self.model = Model().to(device)
        self.model.load_state_dict(torch.load("model.pt", map_location=device))
        self.model.eval()

    def predict(self, features: h5py.File):
        """Makes solar PV predictions for a test set.

        You will have to modify this method in order to use additional test set data variables
        with your model.

        Args:
            features (h5py.File): Solar PV, satellite imagery, weather forecast and air quality forecast features.

        Yields:
            Generator[np.ndarray, Any, None]: A batch of predictions.
        """

        with torch.inference_mode():
            # Select the variables you wish to use here!
            for pv, hrv in self.batch(features, variables=["pv", "hrv"], batch_size=32):
                # Produce solar PV predictions for this batch
                yield self.model(
                    torch.from_numpy(pv).to(device),
                    torch.from_numpy(hrv).to(device),
                )


if __name__ == "__main__":
    evaluator = Evaluator()
    evaluator.setup()

FileNotFoundError: [Errno 2] No such file or directory: 'model.pt'

In [70]:
model_path = "C:\\Users\\james\\OneDrive - University of Bristol\\Desktop\\UCL\\aml\\submission\\model.pt"

with open(model_path, 'rb') as f:
    print("Successfully opened model.pt")

Successfully opened model.pt


In [60]:
from submission.run import Evaluator

DATA_PATH = "data/validation/data.hdf5"


def main():
    # Load the data (combined features & targets)
    try:
        data = h5py.File(DATA_PATH, "r")
    except FileNotFoundError:
        print(f"Unable to load features at `{DATA_PATH}`")
        return

    # Switch into the submission directory
    cwd = os.getcwd()
    os.chdir("submission")

    # Make predictions on the data
    try:
        evaluator = Evaluator()

        predictions = []
        for batch in evaluator.predict(features=data):
            assert batch.shape[-1] == 48
            predictions.append(batch)
    finally:
        os.chdir(cwd)

    # Output the mean absolute error
    mae = np.mean(np.absolute(data["targets"] - np.concatenate(predictions)))
    print("MAE:", mae)


if __name__ == "__main__":
    main()

Unable to load features at `data/validation/data.hdf5`
