In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from pathlib import Path
import re
from readprocess import readprocess
from NNfunctionality import *
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from sklearn.preprocessing import MinMaxScaler
from PIL import Image

In [2]:
hist_path = Path(Path.cwd(),"..","ProcessedHistograms").resolve()
yield_path = Path(Path.cwd(),"..","Yield Data","all_country_crop_yield_tons_per_hectare.csv").resolve()
water_path = Path(Path.cwd(),"..","WaterProcessed").resolve()
data_class = readprocess(featurepath=hist_path,yieldpath=yield_path,waterpath = water_path,train_yearcount=13,test_yearcount=1,num_timesteps=46,num_features=576)

Select the first 13 years for training. Final year for predicting

In [3]:
scaler = MinMaxScaler()
water_array_train = data_class.water_array_train.reshape((-1,1))
water_array_test = data_class.water_array_test.reshape((-1,1))
scaler.fit(water_array_train)
water_array_train = scaler.transform(water_array_train).reshape((len(data_class.country_list)*13,))
water_array_test = scaler.transform(water_array_test).reshape((len(data_class.country_list)*1,))
X_train = data_class.dataset_train_resized
y_train = data_class.yield_array_train
X_test = data_class.dataset_test_resized
y_test = data_class.yield_array_test

The feature data batches will be of shape 14,46,576 and yield will be of shape 14

In [4]:
dataset_train = prepare_dataset(X_train,water_array_train,y_train)
dataloader_train = DataLoader(dataset_train,batch_size=1,shuffle=False,batch_sampler=None)
dataset_test = prepare_dataset(X_test,water_array_test,y_test)
dataloader_test = DataLoader(dataset_test,batch_size=1,shuffle=False,batch_sampler=None)

Hyper parameter tuning

In [5]:
from itertools import product
#parameters to investigate
hidden_units = [100,150]
dense_units = [200,250]
dropout = [0.2,0.25]

combinations = list(product([i for i,j in enumerate(hidden_units)], [i for i,j in enumerate(dense_units)],[i for i,j in enumerate(dropout)]))
parameter_list = list()

for combination in combinations:
    parameter_list.append([hidden_units[combination[0]],dense_units[combination[1]],dropout[combination[2]]])

In [6]:
results_dict = {}
for parameter_set in parameter_list:
    epoch_no = 10
    hidden_units = parameter_set[0]
    dense_units = parameter_set[1]
    dropout = parameter_set[2]
    trainvalidate = TrainingValidatingLSTM()
    model = LSTMnetwork(32,hidden_dim = hidden_units,dense_size = dense_units,batch_size = 1,extra_features = 1,dropout = dropout,time_steps=46)
    mse_loss = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(),lr = 0.001)
    for i in range(epoch_no):
        trainvalidate.train(dataloader=dataloader_train,model=model,loss_fn=mse_loss,optimizer = optimizer,batch_size=1)

    avg_error_valid,model = trainvalidate.validate(dataloader=dataloader_test,model = model,batch_size =1)
    avg_error_train,model = trainvalidate.validate(dataloader=dataloader_train,model = model,batch_size =1)
    print(avg_error_valid,avg_error_train)
    results_dict[f"Hidden Units : {hidden_units},Dense Units : {dense_units}, Dropout : {dropout}"] = (avg_error_valid,model)
    



RuntimeError: shape '[-1, 24]' is invalid for input of size 1472

In [10]:
best_model = results_dict["Hidden Units : 50,Dense Units : 250, Dropout : 0.25"]

In [11]:
best_model = results_dict['Hidden Units : 100,Dense Units : 250, Dropout : 0.5'][1]

In [16]:
pred_list = list()
truth = list()
for batch,(X,y) in enumerate(dataloader_train):
    hidden_state = best_model.init_hidden(14)
    ypred,hidden_state = best_model(X,hidden_state)
    ypred = list(ypred.reshape(y.shape).detach().numpy())
    pred_list.extend(ypred)
    y = list(y.numpy())
    truth.extend(y)

Train model with extra water feature

In [5]:
epoch_no = 10
model_water = LSTMnetwork(576,hidden_dim=50,dense_size = 250,batch_size = 1,extra_features = 1,dropout = 0.25,time_steps=46)
mse_loss = nn.MSELoss()
trainvalidate = TrainingValidatingLSTM()
optimizer = torch.optim.SGD(model_water.parameters(),lr = 0.001)
for i in range(epoch_no):
    trainvalidate.train(dataloader=dataloader_train,model=model_water,loss_fn=mse_loss,optimizer = optimizer,batch_size=1)
avg_error,model_water = trainvalidate.validate(dataloader=dataloader_test,model = model_water,batch_size =1)

  return F.mse_loss(input, target, reduction=self.reduction)


In [6]:
avg_error

tensor(0.7351)