In [1]:
#!module load cuda

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
import torchvision.transforms as T

import torch.nn as nn
import torch.nn.functional as F

from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

from jupyterplot import ProgressPlot
from tqdm import tqdm, trange

from datetime import datetime, timedelta

In [3]:
class WeatherDataset(Dataset):
    def __init__(self, weather_data = 'data/big_arpafvg.csv', img_dir = 'data/images'):
        initial_data=pd.read_csv(weather_data)
        initial_data = initial_data.drop(columns=['Temp. min gradi C','Temp. med gradi C','Temp. max gradi C','Vento med km/h','Dir. V. max gradi N'])
        #normalize the data
        for col in initial_data.columns:
            if col != 'giorno' and col != 'mese' and col != 'anno':
                initial_data[col] = (initial_data[col] - initial_data[col].mean()) / initial_data[col].std()
        self.weather_data = initial_data
        self.img_dir = img_dir
        self.seq_length = 7
        self.target_column_index = 3
        self.date_generated = []

    def __getitem__(self, date):
        #get day month year from date in format dd-mm-yyyy
        day, month, year = date.split('_')
        #get from weather data the row with the same date
        weather_data = self.weather_data[(self.weather_data['giorno'] == int(day)) & (self.weather_data['mese'] == int(month)) & (self.weather_data['anno'] == int(year))]
        weather_data = torch.tensor(weather_data.values[0])

        image = os.path.join(self.img_dir, str(date) + '.jpg')  
        image = read_image(image)   
        return [image, weather_data]
    
    def __len__(self):
        return len(self.weather_data)
    '''
    def create_sequences(self, data):
        xs, ys, imgs = [], [], []
        for i in range(len(data) - self.seq_length-1):
            x = data[i:i+self.seq_length]
            y = data[i+self.seq_length+1, self.target_column_index]
            img_path = os.path.join(self.img_dir,str(x[2]) + '_' + str(x[1]) + '_'+ (x[0]) + '.jpg')
            img= read_image(img_path)
            xs.append(x)
            ys.append(y)
            imgs.append(img)
        return np.array(xs), np.array(ys)
    '''

    def date_generation(self, start_date, end_date):
        start_date = datetime.strptime(start_date, "%d_%m_%Y")
        end_date = datetime.strptime(end_date, "%d_%m_%Y")
        date_generated = [start_date + timedelta(days=x) for x in range(0, (end_date-start_date).days)]
        #transform the list of datetime objects in a list of strings in format dd-mm-yyyy
        date_generated = [date.strftime("%d_%m_%Y") for date in date_generated]
        self.date_generated=date_generated
        return date_generated
    
    def create_sequences(self, len_seq):
        xs, ys, imgs = [], [], []
        for date in self.date_generated[:-len_seq]:
            end_of_week = date.split('_')
            end_of_week = datetime(int(end_of_week[2]), int(end_of_week[1]), int(end_of_week[0])) + timedelta(days = len_seq)
            end_of_week = end_of_week.strftime("%d_%m_%Y")


            week=self.date_generation(date, end_of_week)
            img, x =[], []
            for day in week:
                a , b = self.__getitem__(day)
                img.append(a)
                x.append(b)
            imgs.append(img)
            xs.append(x)

            next = end_of_week.split('_')
            next = datetime(int(next[2]), int(next[1]), int(next[0])) + timedelta(days = 1)
            next = next.strftime("%d_%m_%Y")
            ys.append(self.__getitem__(next)[1][self.target_column_index])
        return np.array(imgs), np.array(xs), np.array(ys)

In [4]:
dataset = WeatherDataset()

In [5]:
#generate a list of strings dd-mm-yyyy from 01-06-2023 to 15-6-2024
dataset.date_generation("01_06_2024", "15_06_2024")
imgs, xs, ys = dataset.create_sequences(7)  


In [6]:
train_len = int(0.8 * len(dataset.date_generated))
test_len = len(dataset.date_generated) - train_len
#split the dataset: date_trainset contains the first 80% of the dates, date_testset contains the remaining 20%
xs_train=xs[:train_len]
xs_test=xs[train_len:]
ys_train=ys[:train_len]
ys_test=ys[train_len:]
imgs_train=imgs[:train_len]
imgs_test=imgs[train_len:]

In [7]:


#convert a list of tensor into a tensor
xs_train = torch.from_numpy(xs_train).float()
xs_test = torch.from_numpy(xs_test).float()
imgs_train = torch.from_numpy(imgs_train).float()
imgs_test = torch.from_numpy(imgs_test).float()
ys_train = torch.from_numpy(ys_train).float()
ys_test = torch.from_numpy(ys_test).float()


train_dataset = torch.utils.data.TensorDataset(imgs_train, xs_train, ys_train)
test_dataset = torch.utils.data.TensorDataset(imgs_test, xs_test, ys_test)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = len (xs_train), shuffle = True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = len(xs_test), shuffle = True)

print(len(xs_test))



2


In [42]:
class DeepWeather(nn.Module):
    def __init__(self):
        super(DeepWeather, self).__init__()

        self.conv1 = nn.Conv3d(in_channels = 7, out_channels = 8, kernel_size = (1, 3, 3), stride = 1)
        self.pool = nn.MaxPool3d(kernel_size=(1,2,2), padding=(0,1,1))
        self.dropout = nn.Dropout(p = 0.3)
        self.bn1 = nn.BatchNorm3d(8)
        self.conv2 = nn.Conv3d(in_channels = 8, out_channels = 16, kernel_size = (1, 3, 3), stride = 1)
        self.bn2 = nn.BatchNorm3d(16)
        self.conv3 = nn.Conv3d(in_channels = 16, out_channels = 32, kernel_size = (1, 3, 3), stride = 1)
        self.bn3 = nn.BatchNorm3d(32)
        self.conv4 = nn.Conv3d(in_channels = 32, out_channels = 64, kernel_size = (1, 3, 3), stride = 1)
        self.bn4 = nn.BatchNorm3d(64)
        self.conv5 = nn.Conv3d(in_channels = 64, out_channels = 128, kernel_size = (1, 3, 3), stride = 1)
        self.bn5 = nn.BatchNorm3d(128)
        self.conv6 = nn.Conv3d(in_channels = 128, out_channels = 1, kernel_size = (1, 3, 3), stride = 1)
        self.bn6 = nn.BatchNorm3d(1)

        self.input_size = 9226
        self.hidden_size = 16
        self.num_layers = 2
        self.output_size = 16
        self.rnn = nn.RNN(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, x1, x2):   
        
        print(f'shape of x1: {x1.shape}')
        print(f'shape of x2: {x2.shape}') 
        step=self.conv1(x1)
        print(f'step 1 done: {step.shape}') 
        step=F.leaky_relu(step)
        print(f'step 2 done: {step.shape}')
        step=self.pool(step)
        print(f'step 3 done: {step.shape}')
        step=self.dropout(step)
        print(f'step 4 done: {step.shape}')
        step=self.bn1(step)
        print(f'step 5 done: {step.shape}')
        step=self.conv2(step)
        print(f'step 6 done. {step.shape}')
        step=F.leaky_relu(step)
        print(f'step 7 done. {step.shape}')
        step=self.pool(step)
        print(f'step 8 done. {step.shape}')
        step=self.dropout(step)
        print(f'step 9 done. {step.shape}')
        step=self.bn2(step)
        print(f'step 10 done. {step.shape}')
        step=self.conv3(step)
        print(f'step 11 done. {step.shape}')
        step=F.leaky_relu(step)
        print(f'step 12 done. {step.shape}')
        step=self.pool(step)
        print(f'step 13 done. {step.shape}')
        step=self.dropout(step)
        print(f'step 14 done. {step.shape}')
        step=self.bn3(step)
        print(f'step 15 done. {step.shape}')
        step=self.conv4(step)
        print(f'step 16 done. {step.shape}')
        step=F.leaky_relu(step)
        print(f'step 17 done. {step.shape}')
        step=self.pool(step)
        print(f'step 18 done. {step.shape}')
        step=self.dropout(step)
        print(f'step 19 done. {step.shape}')
        step=self.bn4(step)
        print(f'step 20 done. {step.shape}')
        step=self.conv5(step)
        print(f'step 21 done. {step.shape}')
        step=F.leaky_relu(step)
        print(f'step 22 done. {step.shape}')
        step=self.pool(step)
        print(f'step 23 done. {step.shape}')
        step=self.dropout(step)
        print(f'step 24 done. {step.shape}')
        step=self.bn5(step)
        print(f'step 25 done. {step.shape}')
        step=self.conv6(step)
        print(f'step 26 done. {step.shape}')
        step=F.leaky_relu(step)
        print(f'step 27 done. {step.shape}')
        step=self.pool(step)
        print(f'step 28 done. {step.shape}')
        step=self.dropout(step)
        print(f'step 29 done. {step.shape}')
        step=self.bn6(step)
        print(f'step 30 done. {step.shape}')
        
        
        
        x1 = self.bn1(self.dropout(self.pool(F.leaky_relu(self.conv1(x1)))))

        x1 = self.bn2(self.dropout(self.pool(F.leaky_relu(self.conv2(x1)))))
        x1 = self.bn3(self.dropout(self.pool(F.leaky_relu(self.conv3(x1)))))
        x1 = self.bn4(self.dropout(self.pool(F.leaky_relu(self.conv4(x1)))))
        x1 = self.bn5(self.dropout(self.pool(F.leaky_relu(self.conv5(x1)))))
        x1 = self.bn6(self.dropout(self.pool(F.leaky_relu(self.conv6(x1)))))
        x1 = torch.flatten(x1, start_dim = 1)
        print(f'shape before cat{ x1.shape}')
        print(f'x2{x2.shape}')

        #x = torch.cat((x1, x2), dim = 1)
        h0 = torch.zeros(self.num_layers, self.hidden_size).to(x2.device)
        print(f' x2 shape {x2.shape}')
        print(f' h0 shape {h0.shape}')
        out, _ = self.rnn(x2, h0)
        out = torch.cat((out, x1), dim = 1)
        print(f' out shape {out.shape}')
        out = self.fc(out)

        return out

In [43]:
model_test=DeepWeather()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model_test=model_test.to(device)

In [44]:
#primo elemento del trainloader
imgs, xs, ys = next(iter(train_loader))
model_test.forward(imgs.to(device), xs.to(device))

shape of x1: torch.Size([5, 7, 3, 512, 512])
shape of x2: torch.Size([5, 7, 10])
step 1 done: torch.Size([5, 8, 3, 510, 510])
step 2 done: torch.Size([5, 8, 3, 510, 510])
step 3 done: torch.Size([5, 8, 3, 256, 256])
step 4 done: torch.Size([5, 8, 3, 256, 256])
step 5 done: torch.Size([5, 8, 3, 256, 256])
step 6 done. torch.Size([5, 16, 3, 254, 254])
step 7 done. torch.Size([5, 16, 3, 254, 254])
step 8 done. torch.Size([5, 16, 3, 128, 128])
step 9 done. torch.Size([5, 16, 3, 128, 128])
step 10 done. torch.Size([5, 16, 3, 128, 128])
step 11 done. torch.Size([5, 32, 3, 126, 126])
step 12 done. torch.Size([5, 32, 3, 126, 126])
step 13 done. torch.Size([5, 32, 3, 64, 64])
step 14 done. torch.Size([5, 32, 3, 64, 64])
step 15 done. torch.Size([5, 32, 3, 64, 64])
step 16 done. torch.Size([5, 64, 3, 62, 62])
step 17 done. torch.Size([5, 64, 3, 62, 62])
step 18 done. torch.Size([5, 64, 3, 32, 32])
step 19 done. torch.Size([5, 64, 3, 32, 32])
step 20 done. torch.Size([5, 64, 3, 32, 32])
step 21 d

RuntimeError: For batched 3-D input, hx should also be 3-D but got 2-D tensor

In [None]:
def train(model, dataset, train_loader, test_loader, criterion, optimizer, epochs = 50, first_time = True, num_saved_epochs = 0):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    train_loss_epochs = []
    test_loss_epochs = []
    bar = trange(epochs, desc=f"Epoch ?/?, Train Loss: ?, Test Loss: ?")
    for epoch in bar:
        model.train()
        train_losses = []
        for img, xs, ys in train_loader:
            optimizer.zero_grad()
            print(img.shape, xs.shape)
            outputs = model(img, xs)
            loss = criterion(outputs, ys)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())
            
        train_loss_epochs.append(np.mean(train_losses))
        model.eval()
        test_losses = []
        for ims, xs, ys in test_loader:
                outputs = model(imgs, xs)
                loss = criterion(outputs.squeeze(), ys)
                test_losses.append(loss.item())
                
        test_loss_epochs.append(np.mean(test_losses))
        bar.set_description(f"Epoch {epoch + 1}/{epochs}, Train Loss: {np.mean(train_losses)}, Test Loss: {np.mean(test_losses)}")
        #if first_time:
            #torch.save(model.state_dict(), f"deepweather_epoch{num_saved_epochs + epoch + 1}.pth")

    return train_loss_epochs, test_loss_epochs

In [None]:
#Uncomment the lines below if you want to train/load a pretrained model
#num_saved_epochs = 50
#model = model.load_state_dict(torch.load(f'weights/epoch_{num_saved_epochs}'))

#Comment the (ONE) line below if you want to train/load a pretrained model.
#!mkdir weights
model = DeepWeather()

#convert a list of tensor into a tensor
xs_train = torch.from_numpy(xs_train).float()
xs_test = torch.from_numpy(xs_test).float()
imgs_train = torch.from_numpy(imgs_train).float()
imgs_test = torch.from_numpy(imgs_test).float()
ys_train = torch.from_numpy(ys_train).float()
ys_test = torch.from_numpy(ys_test).float()


train_dataset = torch.utils.data.TensorDataset(imgs_train, xs_train, ys_train)
test_dataset = torch.utils.data.TensorDataset(imgs_test, xs_test, ys_test)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = len (xs_train), shuffle = True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = len(xs_test), shuffle = True)

print(len(xs_test))

#xs_trainloader = DataLoader(xs_train , batch_size = len(xs_train), shuffle = False)
#xs_testloader = DataLoader(xs_test, batch_size = len(xs_test), shuffle = False)

#imgs_trainloader = DataLoader(imgs_train, batch_size = len(imgs_train), shuffle = False)
#imgs_testloader = DataLoader(imgs_test, batch_size = len(imgs_test), shuffle = False)

#ys_trainloader = DataLoader(ys_train, batch_size = len(ys_train), shuffle = False)
#ys_testloader = DataLoader(ys_test, batch_size = len(ys_test), shuffle = False)

criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 10e-6, momentum = 0)

In [None]:
train_losses, val_losses = train(model, dataset, train_loader, test_loader, criterion, optimizer, epochs = 50, first_time = True, num_saved_epochs = 0)

In [None]:
epochs = [epoch for epoch in range(50)]
plt.plot(epochs, train_losses, label = 'Training Loss')
plt.plot(epochs, val_losses, label = 'Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
def predict(model, date):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    inputs = dataset[date]
    inputs = [inputs[0].float().unsqueeze(0).to(device), inputs[1].float().unsqueeze(0).to(device)]

    outputs = model(inputs)    
    
    return outputs

In [None]:
print(predict(model, '31_12_2023'))
print(dataset['31_12_2023'][1][3])

In [None]:
for date in date_testset:
    predicted_rain = predict(model, date)
    #non normalized results
    predicted_rain = predicted_rain 
    print(f"Predicted rain normalized for {date} is {predicted_rain}")
    #while real rain is
    print(f"Real rain normalized  for {date} is {dataset[date][1][3]}")   
