In [1]:
#%pip install numpy matplotlib zarr xarray ipykernel gcsfs fsspec dask cartopy ocf-blosc2 torchinfo
#%pip install -U doxa-cli

Note: you may need to restart the kernel to use updated packages.




Collecting doxa-cli
  Downloading doxa_cli-0.1.7-py3-none-any.whl (16 kB)
Collecting requests~=2.26.0
  Using cached requests-2.26.0-py2.py3-none-any.whl (62 kB)
Installing collected packages: requests, doxa-cli
  Attempting uninstall: requests
    Found existing installation: requests 2.31.0
    Uninstalling requests-2.31.0:
      Successfully uninstalled requests-2.31.0
  Attempting uninstall: doxa-cli
    Found existing installation: doxa-cli 0.1.5
    Uninstalling doxa-cli-0.1.5:
      Successfully uninstalled doxa-cli-0.1.5
Successfully installed doxa-cli-0.1.7 requests-2.26.0
Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
anaconda-project 0.11.1 requires ruamel-yaml, which is not installed.
translate-json 0.0.2 requires certifi==2021.5.30, but you have certifi 2023.5.7 which is incompatible.
translate-json 0.0.2 requires idna==3.2; python_version >= "3", but you have idna 2.10 which is incompatible.
osmnx 1.9.1 requires requests>=2.27, but you have requests 2.26.0 which is incompatible.
conda-repo-cli 1.0.20 requires clyent==1.2.1, but you have clyent 1.2.2 which is incompatible.
conda-repo-cli 1.0.20 requires nbformat==5.4.0, but you have nbformat 5.5.0 which is incompatible.
conda-repo-cli 1.0.20 requires requests==2.28.1, but you have requests 2.26.0 which is incompatible.


## Importing packages

Here, we import a number of packages we will need to train our first model.

In [1]:
import os
from datetime import datetime, time, timedelta
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import xarray as xr
from ocf_blosc2 import Blosc2
from torch.utils.data import DataLoader, IterableDataset
from torchinfo import summary
import json
import geopandas as gpd
import seaborn as sns
from scipy.ndimage import zoom
import numpy as np
import h5py

plt.rcParams["figure.figsize"] = (20, 12)

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


Run this block to install all data

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device
if not os.path.exists("submission"):
     os.makedirs("submission", exist_ok=True)
     #Installing locally means you do not need to rerun this each time you restart the notebook
     !curl -L https://raw.githubusercontent.com/climatehackai/getting-started-2023/main/submission/competition.py --output submission/competition.py
     !curl -L https://raw.githubusercontent.com/climatehackai/getting-started-2023/main/submission/doxa.yaml --output submission/doxa.yaml
     !curl -L https://raw.githubusercontent.com/climatehackai/getting-started-2023/main/submission/model.py --output submission/model.py
     !curl -L https://raw.githubusercontent.com/climatehackai/getting-started-2023/main/submission/run.py --output submission/run.py
     !curl -L https://raw.githubusercontent.com/climatehackai/getting-started-2023/main/indices.json --output indices.json
if not os.path.exists("data"):
    os.makedirs("data/pv/2020", exist_ok=True)
    os.makedirs("data/satellite-hrv/2020", exist_ok=True)

    !curl -L https://huggingface.co/datasets/climatehackai/climatehackai-2023/resolve/main/pv/metadata.csv --output data/pv/metadata.csv
    !curl -L https://huggingface.co/datasets/climatehackai/climatehackai-2023/resolve/main/pv/2020/7.parquet --output data/pv/2020/7.parquet
    !curl -L https://huggingface.co/datasets/climatehackai/climatehackai-2023/resolve/main/satellite-hrv/2020/7.zarr.zip --output data/satellite-hrv/2020/7.zarr.zip
pv = pd.read_parquet("data/pv/2020/7.parquet").drop("generation_wh", axis=1)
#The parquet data here is similar to a dataframe. The "power" is the column with the other data types being indexes. The data is shaped with each timestamp being its own sub frame with the sites having their corresponding power (I think this is the % of their total possible yield).  
hrv = xr.open_dataset(
    "data/satellite-hrv/2020/7.zarr.zip", engine="zarr", chunks="auto"
)
#The way that this works is that it stores the image as a vector. The vectors are stored as an array of vectors. These then have a timestamp, as we only have one channel the array is a 1D set of vectors with the dimension being time. Read this to help you understand how this is being stored https://tutorial.xarray.dev/fundamentals/01_datastructures.html
# To access I have included some examples below
#The float value (float16-float64) shows the precision with which data is stored. Later on it is important to make sure that when you are feeding in data into the model that the float type matches between data types, this currently is not a problem when only using the HRV data. I am not yet sure if this will be a problem when using the NWP data.
with open("indices.json") as f:
    site_locations = {
        data_source: {
            int(site): (int(location[0]), int(location[1]))
            for site, location in locations.items()
        }
        for data_source, locations in json.load(f).items()
    }


Not used in this model

In [51]:
nwp = xr.open_dataset(
    "zip:///::https://huggingface.co/datasets/climatehackai/climatehackai-2023/resolve/main/weather/2020/7.zarr.zip",
    engine="zarr",
    consolidated=True,
)
#note that this only covers one month in this download
nwp

## Here in these are used for running the model

## Used for creating the dataloader that passes the data to the model, this needs to be changed if we want to pass in non-hrv data

In [5]:
#This function extracts the area around each individual site using the PV dicts pixel based (as in the location of sites are determined by their pixel in the image) location and then extracts an area around each site. These areas are combined, based on their timestamp,
#with the HRV data that then has its satellite imagery data extracted. This implies that the model is using subsets of the satellite imagery to train the model to make predictions for each site rather than using the whole image and then "learning" where the sites are.

class ChallengeDataset(IterableDataset):#This function sets up the data so that it can be iterated through by the CNN
    def __init__(self, pv, hrv, site_locations, sites=None):#The "self" augmentation here is used to use create a shared class between the different data types that are then iterable based on their shared timestamp
        self.pv = pv
        self.hrv = hrv
        self._site_locations = site_locations
        self._sites = sites if sites else list(site_locations["hrv"].keys())#This gets the individual site ids which are stored as the dict's keys

    def _get_image_times(self):#This function starts at the minimum date in the set and iterates up to the highest date, this is done as the data set is large and due to the nature of the parquette and xarray
        min_date = datetime(2020, 7, 1)
        max_date = datetime(2020, 7, 2)
        #max and min need to be changed if we use more than one month of data
        start_time = time(8)
        end_time = time(17)

        date = min_date#starts at the first timestamp
        while date <= max_date: #iterates through up to the max
            current_time = datetime.combine(date, start_time)
            while current_time.time() < end_time:
                if current_time:
                    yield current_time

                current_time += timedelta(minutes=60)

            date += timedelta(days=1)

    def __iter__(self):
        for time in self._get_image_times():
            first_hour = slice(str(time), str(time + timedelta(minutes=55)))#gets the time and then uses this to select the corresponding time from the pv set  

            pv_features = pv.xs(first_hour, drop_level=False)  # this gets the pv yield of the current timestamp selected earlier
            pv_targets = pv.xs(
                slice(  # type: ignore
                    str(time + timedelta(hours=1)),
                    str(time + timedelta(hours=4, minutes=55)),
                ),
                drop_level=False,
            )#pv targets defines the time span over which we are trying to make pv yield predictions

            hrv_data = self.hrv["data"].sel(time=first_hour).to_numpy()#gets the hrv satellite image that is associated with the first hour timestamp setting it up as an input feature

            for site in self._sites:
                try:
                    # Get solar PV features and targets, the site_targets is used to find the models loss
                    site_features = pv_features.xs(site, level=1).to_numpy().squeeze(-1)#gets the pixel based location of the pv site and then uses this to make predictions based on the individual sites
                    site_targets = pv_targets.xs(site, level=1).to_numpy().squeeze(-1)
                    assert site_features.shape == (12,) and site_targets.shape == (48,)#compresses the data from N dimensions to 12 and 48 respectively

                    # Get a 128x128 HRV crop centred on the site over the previous hour
                    x, y = self._site_locations["hrv"][site]#gets the location of the site based on the pv sites pixel level location
                    hrv_features = hrv_data[:, y - 64 : y + 64, x - 64 : x + 64, 0]
                    assert hrv_features.shape == (12, 128, 128)#crops the image to be be 128x128 around the site
                    #asset is used to force the dimensions of the extracted site level image to be the same
                    # How might you adapt this for the non-HRV, weather and aerosol data?
                except:
                    continue

                yield site_features, hrv_features, site_targets

Basic model

In [163]:
#This model takes the inputs of the PV data and the HRV data to make predictions for the pv based on the HRV feature data.
#This step dictates what the actual shape of the CNN is i.e how many layers, what type of connections they have, and how what the kernel size is. 
# We can load in models here from our github repo and work on them seperately, we will need to be mindfull of the fact that changing the model may require us the change the way in which we test and train them.
#
class Model(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()
        #sets up four convolutional layers which have increasingly large output channels
        self.conv1 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3)#nn. is part of the pytorch model
        self.conv2 = nn.Conv2d(in_channels=24, out_channels=48, kernel_size=3)
        self.conv3 = nn.Conv2d(in_channels=48, out_channels=96, kernel_size=3)
        self.conv4 = nn.Conv2d(in_channels=96, out_channels=192, kernel_size=3)
        #Setting up the pooling layer that reduces the conv layers dimensions, we can tweak this to reduce the computational load by changing the kernel size.
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.flatten = nn.Flatten()#creates a 1D tensor (matrix) that can then be passed to the fully connected layer

        self.linear1 = nn.Linear(6924, 48)

    def forward(self, pv, hrv):#takes the inputs of the pb and the hrv data using Relu activation functions
        x = torch.relu(self.pool(self.conv1(hrv)))#passes the hrv data through the convolutional layers defined before by defining it as x.
        x = torch.relu(self.pool(self.conv2(x)))
        x = torch.relu(self.pool(self.conv3(x)))
        x = torch.relu(self.pool(self.conv4(x)))

        x = self.flatten(x)
        print(f"Shape of x = {x.shape}, shape of pv = {pv}")
        x = torch.concat((x, pv), dim=-1) # uses the shared dimension to combine the flattened layers (time is the shared dimension?)

        x = torch.sigmoid(self.linear1(x))#uses this to then make predictions (as the activation function is sigmoid is the model just trying to predict if the site is outputting power or not?)

        return x
model_test = Model().to(device)


In [164]:


# Use torchinfo's summary function
# The input size is passed as a list of tuples, each corresponding to the size of an input the model expects
summary(model_test, input_size=[(1, 12), (1, 12, 128, 128)])

Shape of x = torch.Size([1, 6912]), shape of pv = tensor([[0.2499, 0.2120, 0.1329, 0.2272, 0.0427, 0.5642, 0.5576, 0.1361, 0.0842,
         0.5005, 0.6582, 0.4151]])


Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [1, 48]                   --
├─Conv2d: 1-1                            [1, 24, 126, 126]         2,616
├─MaxPool2d: 1-2                         [1, 24, 63, 63]           --
├─Conv2d: 1-3                            [1, 48, 61, 61]           10,416
├─MaxPool2d: 1-4                         [1, 48, 30, 30]           --
├─Conv2d: 1-5                            [1, 96, 28, 28]           41,568
├─MaxPool2d: 1-6                         [1, 96, 14, 14]           --
├─Conv2d: 1-7                            [1, 192, 12, 12]          166,080
├─MaxPool2d: 1-8                         [1, 192, 6, 6]            --
├─Flatten: 1-9                           [1, 6912]                 --
├─Linear: 1-10                           [1, 48]                   332,400
Total params: 553,080
Trainable params: 553,080
Non-trainable params: 0
Total mult-adds (M): 137.13
Input size (MB): 0.79
Forward/back

In [109]:
summary(Model(), input_size=[(1, 12), (1, 12, 128, 128)])#Use this to see what your model looks like before running it
#The input size here is used to prime the model summary with the input dimensions it should be expecting.
#Here we have the pv data first (1,12) it has a univariate dimension for each site with 12 dimensions per hour.
#Next we have the hrv data for each site, I think this is done as 1 observation 12 times an hour, with 128 x 128 dimension vector for the image.

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [1, 48]                   --
├─Conv2d: 1-1                            [1, 24, 126, 126]         2,616
├─MaxPool2d: 1-2                         [1, 24, 63, 63]           --
├─Conv2d: 1-3                            [1, 48, 61, 61]           10,416
├─MaxPool2d: 1-4                         [1, 48, 30, 30]           --
├─Conv2d: 1-5                            [1, 96, 28, 28]           41,568
├─MaxPool2d: 1-6                         [1, 96, 14, 14]           --
├─Conv2d: 1-7                            [1, 192, 12, 12]          166,080
├─MaxPool2d: 1-8                         [1, 192, 6, 6]            --
├─Flatten: 1-9                           [1, 6912]                 --
├─Linear: 1-10                           [1, 48]                   332,400
Total params: 553,080
Trainable params: 553,080
Non-trainable params: 0
Total mult-adds (M): 137.13
Input size (MB): 0.79
Forward/back

In [6]:
import torch.nn.functional as F


Computes the model, computationally intensive resnet, comments on the lighter weight resnet

In [21]:
layers = [2, 2, 2, 2]

def conv_block(in_channels, out_channels, kernel_size=3, stride=1, padding=1):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    )

class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv_block(in_channels, out_channels, stride=stride)
        self.conv2 = conv_block(out_channels, out_channels)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out = out + identity
        return F.relu(out, inplace=False)

class ResNet(nn.Module):
    
    def __init__(self, block, layers):
        
        super(ResNet, self).__init__()
        self.in_channels = 64 #reduce the stride
        self.initial = conv_block(12, 64, kernel_size=7, stride=1, padding=3)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=1)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=1)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=1)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        # Adjust this linear layer based on the concatenated size of HRV and PV features
        self.fc = nn.Linear(512  + 12, 48)  

    def _make_layer(self, block, out_channels, num_blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion),
            )
        layers = [block(self.in_channels, out_channels, stride, downsample)]
        self.in_channels = out_channels * block.expansion
        for _ in range(1, num_blocks):
            layers.append(block(self.in_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, pv, hrv ):
        print("Initial HRV shape:", hrv.shape)  
        print("Initial PV shape:", pv.shape) 
        print(f"{pv[0]}")
        x = self.initial(hrv)
        x = self.maxpool(x)
        print("Shape after initial conv and maxpool:", x.shape)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        print("Shape after ResNet blocks:", x.shape)

        x = self.avgpool(x)
        print("Shape after avgpool:", x.shape)
        x = torch.flatten(x, 1)
        pv = torch.flatten(pv, start_dim=1)
        #print(f"Sshape of x = {x.shape} shape of pv = {pv.shape}")
        #x = torch.concat((x, pv), dim=-1)
        print("Shape after avgpool and flatten:", x.shape)

        
        
        #pv = pv.view(pv.size(0), -1)
        if pv.dim() > 2:
            pv = torch.flatten(pv, start_dim=1)
        print("Adjusted PV shape:", pv.shape)

        combined = torch.cat((x, pv), dim=1)

        if self.fc.in_features != combined.shape[1]:
            self.fc = nn.Linear(combined.shape[1], 48).to(combined.device)

        out = self.fc(combined)
        return out
model = ResNet(BasicBlock, layers).to(device)


Lighter weight resnet, I have commented out this model

In [28]:
layers = [2, 2, 2, 2] #Change this to change the number of layers that you are using, 

def conv_block(in_channels, out_channels, kernel_size=3, stride=1, padding=1):
    #This section creates a sequence of layers that perform the networks convolution which are applied iteratively in the Resnet_light block
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding), #Feature extraction
        nn.BatchNorm2d(out_channels), #Noramlises the outputs from the convolution layers
        nn.ReLU(inplace=True)#Applies the activation function
    )

class BasicBlock(nn.Module):
    expansion = 1 
    #Applies the convolution established in the previous layer twice giving the F(x) portion of the resnet model
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv_block(in_channels, out_channels, stride=stride)
        self.conv2 = conv_block(out_channels, out_channels)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x): #Keeps the x portion of the resnet 
        identity = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample is not None: #Downsamples the model if needed to match the dimensions of outputs if the identity output does not match the F(x) portion of the output
            identity = self.downsample(x)
        out = out + identity #Creates the F(x) + x that is then passed to the relu activation function between the resnet conv layers
        return F.relu(out, inplace=False)  #Relu applied to combined results, 

class ResNet_light(nn.Module):
    #This class stacks the multiple basic blocks set up in the previous functions
    def __init__(self, block, layers):
        #I Think we can reduce the number of layers here as the model is applied four convolutions to generate F(x), the resnet paper uses two.
        super(ResNet_light, self).__init__()
        self.in_channels = 12 #reduce the stride
        self.initial = conv_block(12, 12, kernel_size=2, stride=1, padding=3)#Applies the initial convolution 
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2, padding=1)#Runs maxpool convolution
        self.layer1 = self._make_layer(block, 12, layers[0])
        self.layer2 = self._make_layer(block, 24, layers[1], stride=1)
        self.layer3 = self._make_layer(block, 48, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 96, layers[3], stride=1)
        self.avgpool = nn.AdaptiveMaxPool2d((1, 1))#Forces the consistency of output sizes to be 1x1 
        # Adjust this linear layer based on the concatenated size of HRV and PV features
        self.fc = nn.Linear(96  + 12, 48)  #takes the flatterened output of the conv layers for the 12 hourly time instances and then hands them to 48 different class outputs

    def _make_layer(self, block, out_channels, num_blocks, stride=1):#Used to stack the multiple layers of the resnet model
        downsample = None#This checks to make sure that the stride applied matches between input tensor and the output tensor, I am not completely sure if this changes the dimensions of the output tensor
        if stride != 1 or self.in_channels != out_channels * block.expansion:#Expands the number of outputs compared to the inputs, for the BasicBlock typically no expansion is needed. This is still needed for the model to run. 
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion),
            )
        layers = [block(self.in_channels, out_channels, stride, downsample)]#This section creates a series of blocks for the layer
        self.in_channels = out_channels * block.expansion #Ensures that after the blocks have been defined the next layer gets the correct number of input channels
        for _ in range(1, num_blocks):
            layers.append(block(self.in_channels, out_channels))#
        return nn.Sequential(*layers)#Stretches the dims of the resnet to match the layers defined above
        #Need to clarify exactly what expansion is doing.
    def forward(self, pv, hrv ):#Defines how the model passes the outputs through the network
        #print("Initial HRV shape:", hrv.shape)  
        #print("Initial PV shape:", pv.shape) 
        #print(f"{pv[0]}")
        x = self.initial(hrv)#Passes the HRV data through the initial block defined earlier
        x = self.maxpool(x)#Downsamples using maxpooling
        #print("Shape after initial conv and maxpool:", x.shape)

        x = self.layer1(x)#Applies the layers defined above, 
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        #print("Shape after ResNet_light blocks:", x.shape)

        x = self.avgpool(x)
        #print("Shape after avgpool:", x.shape)
        x = torch.flatten(x, 1)# Applies the flattering to the second dimension as the first dimension is the batch size
        pv = torch.flatten(pv, start_dim=1)#Flattens the pv data so that the dimensions of the pv tensor match the dimensions of the HRV tensor
        #print(f"Sshape of x = {x.shape} shape of pv = {pv.shape}")
        #x = torch.concat((x, pv), dim=-1)
        #print("Shape after avgpool and flatten:", x.shape)

        
        
        #pv = pv.view(pv.size(0), -1)
        #Checks to make sure that the pv tensor dimensions match the HRV tensor dimensions
        if pv.dim() > 2:
            pv = torch.flatten(pv, start_dim=1)
        #print("Adjusted PV shape:", pv.shape)

        combined = torch.cat((x, pv), dim=1)#Combines the pv and hrv data along the feature dimension

        if self.fc.in_features != combined.shape[1]:
            self.fc = nn.Linear(combined.shape[1], 48).to(combined.device)
            #Above runs a check to make sure that the number of input features is correct
        out = self.fc(combined) #takes the combined output of the pv and hrv and passes them to the fully connected layer defined above
        return out
model_light_res = ResNet_light(BasicBlock, layers).to(device)


Deeper light weight resnet, good performance over a day and trains quickly.

In [34]:
layers = [4, 4, 4, 4] #For a deeper resnet with 16 total conv layers

def conv_block(in_channels, out_channels, kernel_size=3, stride=1, padding=1):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    )

class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv_block(in_channels, out_channels, stride=stride)
        self.conv2 = conv_block(out_channels, out_channels)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out = out + identity
        return F.relu(out, inplace=False)

class ResNet_light_deep(nn.Module):
    
    def __init__(self, block, layers):
        
        super(ResNet_light_deep, self).__init__()
        self.in_channels = 12 #reduce the stride
        self.initial = conv_block(12, 12, kernel_size=2, stride=1, padding=3)
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 12, layers[0])
        self.layer2 = self._make_layer(block, 24, layers[1], stride=1)
        self.layer3 = self._make_layer(block, 48, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 96, layers[3], stride=1)
        self.avgpool = nn.AdaptiveMaxPool2d((1, 1))
        # Adjust this linear layer based on the concatenated size of HRV and PV features
        self.fc = nn.Linear(96  + 12, 48)  

    def _make_layer(self, block, out_channels, num_blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion),
            )
        layers = [block(self.in_channels, out_channels, stride, downsample)]
        self.in_channels = out_channels * block.expansion
        for _ in range(1, num_blocks):
            layers.append(block(self.in_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, pv, hrv ):
        #print("Initial HRV shape:", hrv.shape)  
        #print("Initial PV shape:", pv.shape) 
        #print(f"{pv[0]}")
        x = self.initial(hrv)
        x = self.maxpool(x)
        #print("Shape after initial conv and maxpool:", x.shape)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        #print("Shape after ResNet_light blocks:", x.shape)

        x = self.avgpool(x)
        #print("Shape after avgpool:", x.shape)
        x = torch.flatten(x, 1)
        pv = torch.flatten(pv, start_dim=1)
        #print(f"Sshape of x = {x.shape} shape of pv = {pv.shape}")
        #x = torch.concat((x, pv), dim=-1)
        #print("Shape after avgpool and flatten:", x.shape)

        
        
        #pv = pv.view(pv.size(0), -1)
        if pv.dim() > 2:
            pv = torch.flatten(pv, start_dim=1)
        #print("Adjusted PV shape:", pv.shape)

        combined = torch.cat((x, pv), dim=1)

        if self.fc.in_features != combined.shape[1]:
            self.fc = nn.Linear(combined.shape[1], 48).to(combined.device)

        out = self.fc(combined)
        return out
model_light_deep_res = ResNet_light_deep(BasicBlock, layers).to(device)


In [88]:
#!pip install torchsummary
from torchinfo import summary


Output of the first resnet I made

In [11]:
hrv_input_size = (1, 12, 224, 224)  # For example: batch size of 1, 12 channels, 224x224 height and width
pv_input_size = (1, 1, 224, 224)    # For example: batch size of 1, 1 channel, 224x224 height and width

# You need to provide the sizes in a list if your model expects multiple inputs
model_input_sizes = [hrv_input_size, pv_input_size]

# Use torchinfo's summary function
# The input size is passed as a list of tuples, each corresponding to the size of an input the model expects
summary(model, input_size=[(1, 12), (1, 12, 128, 128)])

Initial HRV shape: torch.Size([1, 12, 128, 128])
Initial PV shape: torch.Size([1, 12])
tensor([0.7790, 0.9008, 0.9214, 0.2327, 0.2230, 0.0516, 0.0247, 0.5559, 0.5794,
        0.3478, 0.3755, 0.3538])
Shape after initial conv and maxpool: torch.Size([1, 64, 64, 64])
Shape after ResNet blocks: torch.Size([1, 512, 64, 64])
Shape after avgpool: torch.Size([1, 512, 1, 1])
Shape after avgpool and flatten: torch.Size([1, 512])
Adjusted PV shape: torch.Size([1, 12])


Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 48]                   --
├─Sequential: 1-1                        [1, 64, 128, 128]         --
│    └─Conv2d: 2-1                       [1, 64, 128, 128]         37,696
│    └─BatchNorm2d: 2-2                  [1, 64, 128, 128]         128
│    └─ReLU: 2-3                         [1, 64, 128, 128]         --
├─MaxPool2d: 1-2                         [1, 64, 64, 64]           --
├─Sequential: 1-3                        [1, 64, 64, 64]           --
│    └─BasicBlock: 2-4                   [1, 64, 64, 64]           --
│    │    └─Sequential: 3-1              [1, 64, 64, 64]           37,056
│    │    └─Sequential: 3-2              [1, 64, 64, 64]           37,056
│    └─BasicBlock: 2-5                   [1, 64, 64, 64]           --
│    │    └─Sequential: 3-3              [1, 64, 64, 64]           37,056
│    │    └─Sequential: 3-4              [1, 64, 64, 64]           3

In [None]:
#how do we make sure that thi is sequence to sequence and not a sequence to one 

Output for the lighter weight resnet

In [27]:
hrv_input_size = (1, 12, 224, 224)  # For example: batch size of 1, 12 channels, 224x224 height and width
pv_input_size = (1, 1, 224, 224)    # For example: batch size of 1, 1 channel, 224x224 height and width

# You need to provide the sizes in a list if your model expects multiple inputs
model_input_sizes = [hrv_input_size, pv_input_size]

# Use torchinfo's summary function
# The input size is passed as a list of tuples, each corresponding to the size of an input the model expects
summary(model_light_res, input_size=[(1, 12), (1, 12, 128, 128)])

Layer (type:depth-idx)                   Output Shape              Param #
ResNet_light                             [1, 48]                   --
├─Sequential: 1-1                        [1, 12, 133, 133]         --
│    └─Conv2d: 2-1                       [1, 12, 133, 133]         588
│    └─BatchNorm2d: 2-2                  [1, 12, 133, 133]         24
│    └─ReLU: 2-3                         [1, 12, 133, 133]         --
├─MaxPool2d: 1-2                         [1, 12, 67, 67]           --
├─Sequential: 1-3                        [1, 12, 67, 67]           --
│    └─BasicBlock: 2-4                   [1, 12, 67, 67]           --
│    │    └─Sequential: 3-1              [1, 12, 67, 67]           1,332
│    │    └─Sequential: 3-2              [1, 12, 67, 67]           1,332
│    └─BasicBlock: 2-5                   [1, 12, 67, 67]           --
│    │    └─Sequential: 3-3              [1, 12, 67, 67]           1,332
│    │    └─Sequential: 3-4              [1, 12, 67, 67]           1,332
├─

Output for the light weight deeper resnet

In [35]:
hrv_input_size = (1, 12, 224, 224)  # For example: batch size of 1, 12 channels, 224x224 height and width
pv_input_size = (1, 1, 224, 224)    # For example: batch size of 1, 1 channel, 224x224 height and width

# You need to provide the sizes in a list if your model expects multiple inputs
model_input_sizes = [hrv_input_size, pv_input_size]

# Use torchinfo's summary function
# The input size is passed as a list of tuples, each corresponding to the size of an input the model expects
summary(model_light_deep_res, input_size=[(1, 12), (1, 12, 128, 128)])

Layer (type:depth-idx)                   Output Shape              Param #
ResNet_light_deep                        [1, 48]                   --
├─Sequential: 1-1                        [1, 12, 133, 133]         --
│    └─Conv2d: 2-1                       [1, 12, 133, 133]         588
│    └─BatchNorm2d: 2-2                  [1, 12, 133, 133]         24
│    └─ReLU: 2-3                         [1, 12, 133, 133]         --
├─MaxPool2d: 1-2                         [1, 12, 67, 67]           --
├─Sequential: 1-3                        [1, 12, 67, 67]           --
│    └─BasicBlock: 2-4                   [1, 12, 67, 67]           --
│    │    └─Sequential: 3-1              [1, 12, 67, 67]           1,332
│    │    └─Sequential: 3-2              [1, 12, 67, 67]           1,332
│    └─BasicBlock: 2-5                   [1, 12, 67, 67]           --
│    │    └─Sequential: 3-3              [1, 12, 67, 67]           1,332
│    │    └─Sequential: 3-4              [1, 12, 67, 67]           1,332
│ 

In [18]:
# Import the model defined in `submission/model.py`

from submission.model import Model

# Summarises the model created above, used to sense check that the data we are passing through is correct and shows the overall structure of the model.

## Training models
This generates weights for the model that we can then use for validation. The weights are then saved as the model submission meaning that each time we generate weights we can then save the weights along with the associated model.

In [12]:
BATCH_SIZE = 32#This controls the number of sites that predictions are made for I think
#these are used to load in the data based on the previously defined functions above, the above functions can be altered to change how the data is ingested
dataset = ChallengeDataset(pv, hrv, site_locations=site_locations)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, pin_memory=True)#change this to alter which type of data is being loaded in

For the larger resnet


In [19]:
model = model
criterion = nn.L1Loss()#Here we are defining the test stat as MAE
optimiser = optim.Adam(model.parameters(), lr=1e-3)#Here we are defining the optimiser in this case it is ADAM 

For the lighter weight resnet

In [29]:
model = model_light_res
criterion = nn.L1Loss()#Here we are defining the test stat as MAE
optimiser = optim.Adam(model.parameters(), lr=1e-3)#Here we are defining the optimiser in this case it is ADAM 

For the lightweight deeper resnet

In [36]:
model = model_light_deep_res
criterion = nn.L1Loss()#Here we are defining the test stat as MAE
optimiser = optim.Adam(model.parameters(), lr=1e-3)#Here we are defining the optimiser in this case it is ADAM 

In [16]:
torch.autograd.set_detect_anomaly(True)


<torch.autograd.anomaly_mode.set_detect_anomaly at 0x1b995fab7f0>

In [38]:
pv_features.shape

torch.Size([7, 12])

In [39]:
hrv_features.shape

torch.Size([7, 12, 128, 128])

In [37]:
EPOCHS = 1
batch_losses = []
val_losses = []
epoch_train_losses = []
epoch_val_losses = []
for epoch in range(EPOCHS):
    model.train()

    running_loss = 0.0##sets the starting loss at zero
    count = 0#is used to keep track of the number of batches passed through the training model
    for i, (pv_features, hrv_features, pv_targets) in enumerate(dataloader): 
        
        optimiser.zero_grad()#resets the gradient of all the previous weights and biases used in the model, can be changed to alter the type of optimiser we use

        predictions = model(
            pv_features.to(device, dtype=torch.float),
            hrv_features.to(device, dtype=torch.float),
        )#makes predictions based off of current batch of hrv and pv inputs

        loss = criterion(predictions, pv_targets.to(device, dtype=torch.float))#calculates the loss between the models predictions and the actual pv
        loss.backward()#backprops the loss

        optimiser.step()#updates the parameters based on the calculated loss
        ###for generating the training and test loss graph
        running_loss += loss.item() * pv_targets.size(0)
        count += pv_targets.size(0)
        
        size = int(pv_targets.size(0))#calculates the size of the first dimension of the pv_targets tensor  to determine how many data points are in the current tensor
        running_loss += float(loss) * size
        count += size
        #print(count)
        #prints the current training loss for the first 200 data points of 32 batches, then prints again once the next 200 have been computed
        if i % 200 == 199:
            print(f"Epoch {epoch + 1}, {i + 1}: {running_loss / count}")
    epoch_train_loss = running_loss / count
    epoch_train_losses.append(epoch_train_loss)        
    print(f"Epoch {epoch + 1}: {running_loss / count}")
    
    

Epoch 1, 200: 0.22411647208034993
Epoch 1, 400: 0.18745987586677074
Epoch 1: 0.1730484867563558


In [61]:
torch.save(model.state_dict(), "submission/model.pt")

NWP model training

In [235]:
EPOCHS = 1

for epoch in range(EPOCHS):
    model.train()

    running_loss = 0.0##sets the starting loss at zero
    count = 0#sets the count value for iterating to start at the zeroth point (I think this is the zeroth time slice taken earlier)
    for i, (pv_features, hrv_features, pv_targets) in enumerate(dataloader_nwp): 
        optimiser.zero_grad()#resets the gradient of all the previous weights and biases used in the model, can be changed to alter the type of optimiser we use

        predictions = model(
            pv_features.to(device, dtype=torch.float),
            hrv_features.to(device, dtype=torch.float),
        )#makes predictions based off of current batch of hrv and pv inputs

        loss = criterion(predictions, pv_targets.to(device, dtype=torch.float))#calculates the loss between the models predictions and the actual pv
        loss.backward()#backprops the loss

        optimiser.step()#updates the parameters based on the calculated loss

        size = int(pv_targets.size(0))#calculates the size of the first dimension of the pv_targets tensor  to determine how many data points are in the current tensor
        running_loss += float(loss) * size
        count += size
        #prints the current training loss for the first 200 data points, then prints again once the next 200 have been computed
        if i % 200 == 199:
            print(f"Epoch {epoch + 1}, {i + 1}: {running_loss / count}")

    print(f"Epoch {epoch + 1}: {running_loss / count}")

To validate our model we will need to create a validation dataset in the same way the training set is created as the model is sensitive to how the data is loaded into it

In [63]:

from competition import BaseEvaluator
from model import Model

In [68]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class Evaluator(BaseEvaluator):
    def setup(self) -> None:
        model_path = "C:\\Users\\james\\OneDrive - University of Bristol\\Desktop\\UCL\\aml\\submission\\model.pt"

        
        self.model = Model().to(device)
        # Use the full path to load the model
        self.model.load_state_dict(torch.load(model_path, map_location=device))
        self.model.eval()
        """Sets up anything required for evaluation, e.g. loading a model."""

        self.model = Model().to(device)
        self.model.load_state_dict(torch.load("model.pt", map_location=device))
        self.model.eval()

    def predict(self, features: h5py.File):
        """Makes solar PV predictions for a test set.

        You will have to modify this method in order to use additional test set data variables
        with your model.

        Args:
            features (h5py.File): Solar PV, satellite imagery, weather forecast and air quality forecast features.

        Yields:
            Generator[np.ndarray, Any, None]: A batch of predictions.
        """

        with torch.inference_mode():
            # Select the variables you wish to use here!
            for pv, hrv in self.batch(features, variables=["pv", "hrv"], batch_size=32):
                # Produce solar PV predictions for this batch
                yield self.model(
                    torch.from_numpy(pv).to(device),
                    torch.from_numpy(hrv).to(device),
                )


if __name__ == "__main__":
    evaluator = Evaluator()
    evaluator.setup()

FileNotFoundError: [Errno 2] No such file or directory: 'model.pt'

In [70]:
model_path = "C:\\Users\\james\\OneDrive - University of Bristol\\Desktop\\UCL\\aml\\submission\\model.pt"

with open(model_path, 'rb') as f:
    print("Successfully opened model.pt")

Successfully opened model.pt


In [60]:
from submission.run import Evaluator

DATA_PATH = "data/validation/data.hdf5"


def main():
    # Load the data (combined features & targets)
    try:
        data = h5py.File(DATA_PATH, "r")
    except FileNotFoundError:
        print(f"Unable to load features at `{DATA_PATH}`")
        return

    # Switch into the submission directory
    cwd = os.getcwd()
    os.chdir("submission")

    # Make predictions on the data
    try:
        evaluator = Evaluator()

        predictions = []
        for batch in evaluator.predict(features=data):
            assert batch.shape[-1] == 48
            predictions.append(batch)
    finally:
        os.chdir(cwd)

    # Output the mean absolute error
    mae = np.mean(np.absolute(data["targets"] - np.concatenate(predictions)))
    print("MAE:", mae)


if __name__ == "__main__":
    main()

Unable to load features at `data/validation/data.hdf5`
