In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import os
from io import open
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
from scipy.stats import gaussian_kde
import scipy.stats as stats

In [8]:
def pearsonr2(x, y):
    """
    Mimics `scipy.stats.pearsonr`
    Arguments
    ---------
    x : 1D torch.Tensor
    y : 1D torch.Tensor
    Returns
    -------
    r_val : float
        pearsonr correlation coefficient between x and y

    Scipy docs ref:
        https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html

    Scipy code ref:
        https://github.com/scipy/scipy/blob/v0.19.0/scipy/stats/stats.py#L2975-L3033
    Example:
        >>> x = np.random.randn(100)
        >>> y = np.random.randn(100)
        >>> sp_corr = scipy.stats.pearsonr(x, y)[0]
        >>> th_corr = pearsonr(torch.from_numpy(x), torch.from_numpy(y))
        >>> np.allclose(sp_corr, th_corr)
    """
    mean_x = torch.mean(x)
    mean_y = torch.mean(y)
    xm = x.sub(mean_x)
    ym = y.sub(mean_y)
    r_num = xm.dot(ym)
    r_den = torch.norm(xm, 2) * torch.norm(ym, 2)
    r_val = r_num / r_den
    r2_val = r_val*r_val
    return r2_val.to('cpu').numpy()

class R2Loss(nn.Module):
    #calculate coefficient of determination
    def forward(self, y_pred, y):
        var_y = torch.var(y, unbiased=False)
        return 1.0 - F.mse_loss(y_pred, y, reduction="mean") / var_y

import subprocess as sp
import os
def get_gpu_memory():
  _output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]

  ACCEPTABLE_AVAILABLE_MEMORY = 1024
  COMMAND = "nvidia-smi --query-gpu=memory.free --format=csv"
  memory_free_info = _output_to_list(sp.check_output(COMMAND.split()))[1:]
  memory_free_values = [int(x.split()[0]) for i, x in enumerate(memory_free_info)]
  print(memory_free_values)
  return memory_free_values
# use N2O model v1--GRU model
class N2OGRU(nn.Module):
    def __init__(self, ninp, nhid, nlayers, nout, dropout):
        super(N2OGRU, self).__init__()
        if nlayers > 1:
            self.gru = nn.GRU(ninp, nhid,nlayers,dropout=dropout)
        else:
            self.gru = nn.GRU(ninp, nhid,nlayers)
        #self.densor1 = nn.ReLU() #can test other function
        self.densor2 = nn.Linear(nhid, nout)
        self.nhid = nhid
        self.nlayers = nlayers
        self.drop=nn.Dropout(dropout)
        self.init_weights()

    def init_weights(self):
        initrange = 0.1 #may change to a small value
        self.densor2.bias.data.zero_()
        self.densor2.weight.data.uniform_(-initrange, initrange)

    def forward(self, inputs, hidden):
        output, hidden = self.gru(inputs, hidden)
        #output = self.densor1(self.drop(output))
        #output = torch.exp(self.densor2(self.drop(output))) # add exp
        output = self.densor2(self.drop(output)) # add exp
        return output, hidden
#bsz should be batch size
    def init_hidden(self, bsz):
        weight = next(self.parameters())
        return weight.new_zeros(self.nlayers, bsz, self.nhid)
    

#spin-up: bsz0 is number of year of data_sp you provided for spin up; bsz0=-1 means data_sp=[]
#data_sp is the data you provided
#return inihidden for simulation period with first year spin-uped
def spinup(model,data_sp,cycle,bsz):
    inihidden0=model1.init_hidden(bsz)
    for c in range(cycle):
        output_dummy,inihidden0 = model(data_sp,inihidden0)
    return inihidden0
def my_loss(output, target):
    loss = torch.mean((output - target)**2)
    return loss
#for multi-task learning, sumloss
def myloss_mul_sum(output, target,loss_weights):
    loss = 0.0
    nout=output.size(2)
    for i in range(nout):
        loss = loss + loss_weights[i]*torch.mean((output[:,:,i] - target[:,:,i])**2)
    return loss
def scalar_maxmin(X):
    return (X - X.min())/(X.max() - X.min()),X.min(),X.max()

#generate input combine statini 
#x should be size of [seq,batch,n_f1], statini be size of [1,batch,n_f2]
def load_ini(x,x_ini):
    nrep = x.size(0)
    x_ini=x_ini[0,:,:].view(1,x_ini.size(1),x_ini.size(2))
    return torch.cat((x,x_ini.repeat(nrep,1,1)),2)

class Statini_N2OGRU(nn.Module):
    #input model variables are for each module
    def __init__(self, ninp1, ninp2, nhid, nlayers, nout1, nout2, dropout):
        super(Statini_N2OGRU, self).__init__()
        if nlayers > 1:
            self.gru1 = nn.GRU(ninp1, nhid,nlayers,dropout=dropout)
            self.gru2 = nn.GRU(ninp2, nhid,nlayers,dropout=dropout)
        else:
            self.gru1 = nn.GRU(ninp1, nhid,nlayers)
            self.gru2 = nn.GRU(ninp2, nhid,nlayers)
        self.densor1 = nn.Linear(nhid, nout1)
        self.densor2 = nn.Linear(nhid, nout2)
        self.nhid = nhid
        self.nlayers = nlayers
        self.drop=nn.Dropout(dropout)
        self.init_weights()

    def init_weights(self):
        initrange = 0.1 #may change to a small value
        self.densor1.bias.data.zero_()
        self.densor1.weight.data.uniform_(-initrange, initrange)
        self.densor2.bias.data.zero_()
        self.densor2.weight.data.uniform_(-initrange, initrange)

    def forward(self, W_inputs, stat_ini, hidden):
        inputs = load_ini(W_inputs,stat_ini)
        output1, hidden1 = self.gru1(inputs, hidden[0])
        output1 = self.densor1(self.drop(output1)) 
        inputs = torch.cat((W_inputs,output1),2)
        output2, hidden2 = self.gru2(inputs, hidden[1])
        output2 = self.densor2(self.drop(output2)) 
        #need to be careful what is the output orders!!!!!!!!!!!!!
        output=torch.cat((output2,output1),2)
        hidden=(hidden1,hidden2)
        return output, hidden
#bsz should be batch size
    def init_hidden(self, bsz):
        weight = next(self.parameters())
        return (weight.new_zeros(self.nlayers, bsz, self.nhid),\
                weight.new_zeros(self.nlayers, bsz, self.nhid))
    
class Statini_N2OGRU_v2(nn.Module):
    #input model variables are for each module
    def __init__(self, ninp1, ninp2, nhid, nlayers, nout1, nout2, dropout):
        super(Statini_N2OGRU_v2, self).__init__()
        if nlayers > 1:
            self.gru1 = nn.GRU(ninp1, nhid,nlayers,dropout=dropout)
            self.gru2 = nn.GRU(ninp2, nhid,nlayers,dropout=dropout)
        else:
            self.gru1 = nn.GRU(ninp1, nhid,nlayers)
            self.gru2 = nn.GRU(ninp2, nhid,nlayers)
        self.densor1 = nn.Linear(nhid, nout1)
        self.densor2 = nn.Linear(nhid, nout2)
        self.nhid = nhid
        self.nlayers = nlayers
        self.drop=nn.Dropout(dropout)
        self.init_weights()

    def init_weights(self):
        initrange = 0.1 #may change to a small value
        self.densor1.bias.data.zero_()
        self.densor1.weight.data.uniform_(-initrange, initrange)
        self.densor2.bias.data.zero_()
        self.densor2.weight.data.uniform_(-initrange, initrange)

    def forward(self, W_inputs, stat_ini,flux_ini, hidden):
        inputs = load_ini(W_inputs,stat_ini)
        output1, hidden1 = self.gru1(inputs, hidden[0])
        output1 = self.densor1(self.drop(output1)) 
        inputs = torch.cat((W_inputs,output1),2)
        inputs = load_ini(inputs,flux_ini)
        output2, hidden2 = self.gru2(inputs, hidden[1])
        output2 = self.densor2(self.drop(output2)) 
        #need to be careful what is the output orders!!!!!!!!!!!!!
        output=torch.cat((output2,output1),2)
        hidden=(hidden1,hidden2)
        return output, hidden
#bsz should be batch size
    def init_hidden(self, bsz):
        weight = next(self.parameters())
        return (weight.new_zeros(self.nlayers, bsz, self.nhid),\
                weight.new_zeros(self.nlayers, bsz, self.nhid))
    
    
class Statini_N2OGRU_v3(nn.Module):
    #input model variables are for each module
    def __init__(self, ninp1, ninp2, nhid1, nhid2, nlayers1, nlayers2, nout1, nout2, dropout):
        super(Statini_N2OGRU_v3, self).__init__()
        if nlayers1[0] > 1:
            self.gru1_1 = nn.GRU(ninp1[0], nhid1[0],nlayers1[0],dropout=dropout)
        else:
            self.gru1_1 = nn.GRU(ninp1[0], nhid1[0],nlayers1[0])
        if nlayers1[1] > 1:
            self.gru1_2 = nn.GRU(ninp1[1], nhid1[1],nlayers1[1],dropout=dropout)
        else:
            self.gru1_2 = nn.GRU(ninp1[1], nhid1[1],nlayers1[1])
        if nlayers1[2] > 1:
            self.gru1_3 = nn.GRU(ninp1[2], nhid1[2],nlayers1[2],dropout=dropout)
        else:
            self.gru1_3 = nn.GRU(ninp1[2], nhid1[2],nlayers1[2])
        if nlayers1[3] > 1:
            self.gru1_4 = nn.GRU(ninp1[3], nhid1[3],nlayers1[3],dropout=dropout)
        else:
            self.gru1_4 = nn.GRU(ninp1[3], nhid1[3],nlayers1[3])
        if nlayers2 > 1:
            self.gru2 = nn.GRU(ninp2, nhid2,nlayers2,dropout=dropout)
        else:
            self.gru2 = nn.GRU(ninp2, nhid2,nlayers2)

        self.densor1_1 = nn.Linear(nhid1[0], nout1[0])
        self.densor1_2 = nn.Linear(nhid1[1], nout1[1])
        self.densor1_3 = nn.Linear(nhid1[2], nout1[2])
        self.densor1_4 = nn.Linear(nhid1[3], nout1[3])
        self.densor2 = nn.Linear(nhid2, nout2)
        self.nhid1 = nhid1
        self.nhid2 = nhid2
        self.nlayers1 = nlayers1
        self.nlayers2 = nlayers2
        self.drop=nn.Dropout(dropout)
        self.init_weights()

    def init_weights(self):
        initrange = 0.1 #may change to a small value
        self.densor1_1.bias.data.zero_()
        self.densor1_1.weight.data.uniform_(-initrange, initrange)
        self.densor1_2.bias.data.zero_()
        self.densor1_2.weight.data.uniform_(-initrange, initrange)
        self.densor1_3.bias.data.zero_()
        self.densor1_3.weight.data.uniform_(-initrange, initrange)
        self.densor1_4.bias.data.zero_()
        self.densor1_4.weight.data.uniform_(-initrange, initrange)
        self.densor2.bias.data.zero_()
        self.densor2.weight.data.uniform_(-initrange, initrange)

    def forward(self, W_inputs, stat_ini,flux_ini, hidden):
        #layer 1 for states simulation
        inputs = load_ini(W_inputs,stat_ini[0])
        output1_1, hidden1_1 = self.gru1_1(inputs, hidden[0][0])
        output1_1 = self.densor1_1(self.drop(output1_1))
        inputs = load_ini(W_inputs,stat_ini[1])
        output1_2, hidden1_2 = self.gru1_2(inputs, hidden[0][1])
        output1_2 = self.densor1_2(self.drop(output1_2))
        inputs = load_ini(W_inputs,stat_ini[2])
        output1_3, hidden1_3 = self.gru1_3(inputs, hidden[0][2])
        output1_3 = self.densor1_3(self.drop(output1_3))
        inputs = load_ini(W_inputs,stat_ini[3])
        output1_4, hidden1_4 = self.gru1_4(inputs, hidden[0][3])
        output1_4 = self.densor1_4(self.drop(output1_4))
        
        inputs = torch.cat((W_inputs,output1_1,output1_2,output1_3,output1_4),2)
        inputs = load_ini(inputs,flux_ini)
        #layer two for N2O O2 and N2 simulation
        output2, hidden2 = self.gru2(inputs, hidden[1])
        output2 = self.densor2(self.drop(output2)) 
        #need to be careful what is the output orders!!!!!!!!!!!!!
        output=torch.cat((output2,output1_1,output1_2,output1_3,output1_4),2)
        
        hidden=((hidden1_1,hidden1_2,hidden1_3,hidden1_4),hidden2)
        return output, hidden
#bsz should be batch size
    def init_hidden(self, bsz):
        weight = next(self.parameters())
        return ((weight.new_zeros(self.nlayers1[0], bsz, self.nhid1[0]),\
                weight.new_zeros(self.nlayers1[1], bsz, self.nhid1[1]),\
                weight.new_zeros(self.nlayers1[2], bsz, self.nhid1[2]),\
                weight.new_zeros(self.nlayers1[3], bsz, self.nhid1[3])),\
                weight.new_zeros(self.nlayers2, bsz, self.nhid2))



class Statini_sq_N2OGRU(nn.Module):
    #input model variables are for each module
    def __init__(self, ninp1, ninp2, nhid, nlayers, nout1, nout2, dropout):
        super(Statini_sq_N2OGRU, self).__init__()
        if nlayers > 1:
            self.gru1 = nn.GRU(ninp1, nhid,nlayers,dropout=dropout)
            self.gru2 = nn.GRU(ninp2, nhid,nlayers,dropout=dropout)
        else:
            self.gru1 = nn.GRU(ninp1, nhid,nlayers)
            self.gru2 = nn.GRU(ninp2, nhid,nlayers)
        self.densor1 = nn.Linear(nhid, nout1)
        self.densor2 = nn.Linear(nhid, nout2)
        self.nhid = nhid
        self.nlayers = nlayers
        self.drop=nn.Dropout(dropout)
        self.init_weights()

    def init_weights(self):
        initrange = 0.1 #may change to a small value
        self.densor1.bias.data.zero_()
        self.densor1.weight.data.uniform_(-initrange, initrange)
        self.densor2.bias.data.zero_()
        self.densor2.weight.data.uniform_(-initrange, initrange)

    def forward(self, W_inputs, stat_ini_sq, hidden):
        inputs = torch.cat((W_inputs,stat_ini_sq),2)
        output1, hidden1 = self.gru1(inputs, hidden[0])
        output1 = self.densor1(self.drop(output1)) 
        inputs = torch.cat((W_inputs,output1),2)
        output2, hidden2 = self.gru2(inputs, hidden[1])
        output2 = self.densor2(self.drop(output2)) 
        #need to be careful what is the output orders!!!!!!!!!!!!!
        output=torch.cat((output2,output1),2)
        hidden=(hidden1,hidden2)
        return output, hidden
#bsz should be batch size
    def init_hidden(self, bsz):
        weight = next(self.parameters())
        return (weight.new_zeros(self.nlayers, bsz, self.nhid),\
                weight.new_zeros(self.nlayers, bsz, self.nhid))

def get_ini(x,ind,nout):
    initials=[]
    for i in range(len(ind)):
        initials.append(x[:,:,ind[i]].view(x.size(0),x.size(1),nout[i]))
    return initials

def Z_norm(X):
    X_mean=X.mean()
    X_std=np.std(np.array(X))
    return (X-X_mean)/X_std, X_mean, X_std

def Z_norm_reverse(X,Xscaler,units_convert):
    return (X*Xscaler[1]+Xscaler[0])*units_convert

#check whether start time is within the fertilized period
def dropout_check(start_t,fntime_ind):
    dropout_ind=False
    for t in fntime_ind:
        if start_t > t-10 and start_t < t+60:
            dropout_ind=True
    return dropout_ind
        
#sample data considering dropout and leadtime    
def sample_data(X,Y,slw,slw05,totsq,fnfeature_ind):
    maxit=int((totsq-slw)/slw05+1)
    #find the fertilized time
    fntime_ind=np.where(X[:,1,fnfeature_ind].view(-1).to("cpu").numpy()>0)[0]
    #get sliding window data with dropout method
    for it in range(maxit):
        if it==0:
            X_new = X[slw05*it:slw05*it+slw,:,:]
            Y_new = Y[slw05*it:slw05*it+slw,:,:]
        else:
            if not dropout_check(slw05*it,fntime_ind):
                X_new = torch.cat((X_new,X[slw05*it:slw05*it+slw,:,:]),1)
                Y_new = torch.cat((Y_new,Y[slw05*it:slw05*it+slw,:,:]),1)
    #get focused data only for fertilized period with random leading time
    for t in fntime_ind:
        if t != fntime_ind[-1]:
            leadtime=np.random.randint(t-60,t-10)
            X_new = torch.cat((X_new,X[leadtime:leadtime+slw,:,:]),1)
            Y_new = torch.cat((Y_new,Y[leadtime:leadtime+slw,:,:]),1)
    return X_new,Y_new

#sample data considering dropout and leadtime    
def sample_data_FN(X,Y,totsq,fnfeature_ind):
    #find the fertilized time
    fntime_ind=np.where(X[:,1,fnfeature_ind].view(-1).to("cpu").numpy()>0)[0]
    #get focused data only for fertilized period with random leading time
    for t in fntime_ind:
        if t == fntime_ind[0]:
            X_new = X[t-30:t+90,:,:]
            Y_new = Y[t-30:t+90,:,:]
        else:
            X_new = torch.cat((X_new,X[t-30:t+90,:,:]),1)
            Y_new = torch.cat((Y_new,Y[t-30:t+90,:,:]),1)
    return X_new,Y_new

In [4]:
#for load data
#prepare input and output
start=1
end=18
Tx=365 #timesteps
tyear=end-start+1
out_names=['N2O_FLUX','CO2_FLUX', 'WTR_3','NH4_3','NO3_3']
units_convert=[-1000.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0] # to similar scale, mg N m-2, gO2 m-2, mg N m-2
#out_names='N2O_FLUX'
n_out=len(out_names)
#'ATM_CO2' constant, AMENDED_C 0, fire n2o,'FIRE_CH4','STG_DEAD, total 25
f_names_c=['RESIDUE_C','HUMUS_C','LITTER_C','CO2_FLUX','O2_FLUX','AUTO_RESP','MICRO_C','SURF_RES','CH4_FLUX',\
         'SURF_DOC_FLUX','SUBS_DOC_FLUX','SURF_DIC_FLUX','SUBS_DIC_FLUX','NBP','SOC_1','SOC_3','SOC_5',\
         'H2_FLUX','ECO_HVST_C','ECO_LAI','ECO_GPP','ECO_RA','ECO_NPP','ECO_RH','TTL_DIC']

#constant:ACTV_LYR,'SURF_ICE',total 16
f_names_w=['ET','RUNOFF','WATER','DISCHG','SNOWPACK','WTR_1','WTR_3','WTR_5','SURF_WTR','ICE_1','ICE_2','ICE_3',\
           'PSI_1','PSI_3','PSI_5','WTR_TBL']


#constant:FIRE_N,total 24
f_names_n=['RESIDUE_N','HUMUS_N','FERTZR_N','NET_PL_EXCH_N','NH4','NO3','SURF_DON_FLUX','SUBS_DON_FLUX','SURF_DIN_FLUX',\
           'SUBS_DIN_FLUX','N2O_FLUX','NH3_FLUX','N2_FIXN','MICRO_N','NH4_1','NH4_3','NH4_5',\
           'NO3_1','NO3_3','NO3_5','NH4_RES','NO3_RES','ECO_HVST_N','N2_FLUX'] ######### data include the N2O_FLUX!!!!!!!!


#constant:,total 19
f_names_e=['RADN','TMAX_AIR','TMIN_AIR','HMAX_AIR','HMIN_AIR','WIND','PRECN','TMAX_SOIL_1','TMIN_SOIL_1',\
           'TMAX_SOIL_3','TMIN_SOIL_3','TMAX_SOIL_5','TMIN_SOIL_5','TMAX_LITTER','TMIN_LITTER','ECND_1','ECND_3','ECND_5',\
           'TTL_SALT_DISCHG']

#soil property total 15 with variation in new results
fp_names=['TSN','FBCU','PDOY','PDS','PDD','DDOY','PLANTT',\
          'LAT','TLB','TBKDS', 'TCSAND', 'TCSILT', 'TPH', 'TCEC', 'TSOC']



f_names0=f_names_c+f_names_w+f_names_n+f_names_e+fp_names

f_names=['FERTZR_N','RADN','TMAX_AIR','TMIN_AIR','HMAX_AIR','HMIN_AIR','WIND','PRECN']+\
        ['PDOY','PLANTT','TBKDS', 'TCSAND', 'TCSILT', 'TPH', 'TCEC', 'TSOC']

#remove_list=['CO2_FLUX','O2_FLUX','AUTO_RESP','CH4_FLUX','SURF_DOC_FLUX','SUBS_DOC_FLUX',\
#             'SURF_DIC_FLUX','SUBS_DIC_FLUX','H2_FLUX','ECO_GPP','ECO_RA','ECO_NPP','ECO_RH','ET',\
#             'RUNOFF','DISCHG','NET_PL_EXCH_N','SURF_DON_FLUX','SUBS_DON_FLUX','SURF_DIN_FLUX',\
#             'SUBS_DIN_FLUX','N2O_FLUX','NH3_FLUX','N2_FIXN','N2_FLUX','TTL_SALT_DISCHG']
#remove_list=['CH4_FLUX','N2O_FLUX','NH3_FLUX','N2_FLUX','H2_FLUX']
#for c in remove_list:
#    f_names.remove(c)


n_f0=len(f_names0)
n_f=len(f_names)
ind=[]
for i in range(n_f):
    ind.append(f_names0.index(f_names[i]))
    
#ind=sorted(ind)
f_names=[]
for i in ind:
    f_names.append(f_names0[i])

fn_ind=f_names.index('FERTZR_N')
print(fn_ind)

fln=20 #20 for full 0-300, 15 for 80-240
sln=99
bsz0=fln*sln
X=np.zeros([Tx*tyear,bsz0,n_f0],dtype=np.float32)
Y=np.zeros([Tx*tyear,bsz0,n_out],dtype=np.float32)
Xscaler=np.zeros([n_f0,2])
Yscaler=np.zeros([n_out,2])
#load ecosys results
basic_path='D:/machinelearning/pgml_progress/mesocosm/'
path_load = basic_path+'99points_metrix_scaled1_v9_X_part1.sav'
data0=torch.load(path_load)
X[:,:,0:45]=data0['InputX']
Xscaler[0:45,:]=data0['Xscaler']
path_load = basic_path+'99points_metrix_scaled1_v9_X_part2.sav'
data0=torch.load(path_load)
X[:,:,45:84]=data0['InputX']
Xscaler[45:84,:]=data0['Xscaler']
#read soil properties:
path_load = basic_path+'99points_statv_v3_scaled1.sav'
data0=torch.load(path_load)
X[:,:,84:n_f0]=data0['Soil_p']
Xscaler[84:n_f0,:]=data0['Soil_p_scaler']
#use Z-norm to rescale every parameters
#Z-norm for Y
indout=[]
for i in range(n_out):
    indout.append(f_names0.index(out_names[i]))
Y[:,:,:]=X[:,:,indout]
for i in range(n_out):
    Y[:,:,i]=(Y[:,:,i]*(Xscaler[indout[i],1]-Xscaler[indout[i],0])+Xscaler[indout[i],0]) # convert back
    Y[:,:,i],Yscaler[i,0],Yscaler[i,1]=Z_norm(Y[:,:,i])       
#Z-norm for X
X=X[:,:,ind]
Xscaler=Xscaler[ind,:]
for i in range(len(ind)):
    X[:,:,i]=(X[:,:,i]*(Xscaler[i,1]-Xscaler[i,0])+Xscaler[i,0]) # convert back
    X[:,:,i],Xscaler[i,0],Xscaler[i,1]=Z_norm(X[:,:,i])  

Y=torch.from_numpy(Y)
X=torch.from_numpy(X)

#need to change to new Z_norm
W_names=['TMAX_AIR','TMIN_AIR','HMAX_AIR','HMIN_AIR']
Diff_names=['TDIF_AIR','HDIF_AIR']
W_ind=[]
for i in range(len(W_names)):
    W_ind.append(f_names.index(W_names[i]))
#replace max-min
for i in range(len(Diff_names)):
    Vmax=X[:,:,W_ind[i*2]]*Xscaler[W_ind[i*2],1]+Xscaler[W_ind[i*2],0]
    Vmin=X[:,:,W_ind[i*2+1]]*Xscaler[W_ind[i*2+1],1]+Xscaler[W_ind[i*2+1],0] 
    X[:,:,W_ind[i*2+1]]=Vmax-Vmin
    X[:,:,W_ind[i*2+1]],Xscaler[W_ind[i*2+1],0],Xscaler[W_ind[i*2+1],1]=Z_norm(X[:,:,W_ind[i*2+1]])
    f_names[W_ind[i*2+1]]=Diff_names[i]

print(X.size(),Y.size())
print(f_names)

0
torch.Size([6570, 1980, 16]) torch.Size([6570, 1980, 5])
['FERTZR_N', 'RADN', 'TMAX_AIR', 'TDIF_AIR', 'HMAX_AIR', 'HDIF_AIR', 'WIND', 'PRECN', 'PDOY', 'PLANTT', 'TBKDS', 'TCSAND', 'TCSILT', 'TPH', 'TCEC', 'TSOC']


In [5]:
#shuffled_b=torch.randperm(X.size()[1]) # be aware that random may be different every time
if torch.cuda.is_available():
    device = torch.device("cuda")
print(device)
#X=X[:,shuffled_b,:].to(device)   #test unshuffled site
#Y=Y[:,shuffled_b,:].to(device)
X=X.to(device)
Y=Y.to(device)
print(X.size(),n_f)

train_n=70
val_n=10
test_n=19


X_train=X[:,0:train_n*fln,:].view(Tx*tyear,train_n*fln,n_f)
X_val=X[:,train_n*fln:(train_n+val_n)*fln,:].view(Tx*tyear,val_n*fln,n_f)
X_test=X[:,(train_n+val_n)*fln:(train_n+val_n+test_n)*fln,:].view(Tx*tyear,test_n*fln,n_f)
Y_train=Y[:,0:train_n*fln,:].view(Tx*tyear,train_n*fln,n_out)
Y_val=Y[:,train_n*fln:(train_n+val_n)*fln,:].view(Tx*tyear,val_n*fln,n_out)
Y_test=Y[:,(train_n+val_n)*fln:(train_n+val_n+test_n)*fln,:].view(Tx*tyear,test_n*fln,n_out)

flux_vars=['N2O_FLUX']
flux_ind=[]
for i in range(len(flux_vars)):
    flux_ind.append(out_names.index(flux_vars[i]))
stat_vars=['CO2_FLUX', 'WTR_3','NH4_3','NO3_3']
stat_ind=[]
for i in range(len(stat_vars)):
    stat_ind.append(out_names.index(stat_vars[i]))
#loss weights setup
loss_weights=[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]   
print(X_train.size(), Y_train.size())
get_gpu_memory()

cuda
torch.Size([6570, 1980, 16]) 16
torch.Size([6570, 1400, 16]) torch.Size([6570, 1400, 5])
[4643]


[4643]

In [9]:
n_a=64 #hidden state number
n_l=2 #layer of GRU
dropout=0.2
model_version='n2o_gru_mesotest_v4_exp1.sav'  #####!!!!!!!!!!!!!!!!!!!! change this before training
path_save = basic_path+model_version
#output 7 in first module and 3 in second module
model1=Statini_sq_N2OGRU(n_f+len(stat_ind),n_f+len(stat_ind),n_a,n_l,len(stat_ind),len(flux_ind),dropout)
model1.to(device)
print(model1)
params = list(model1.parameters())
print(len(params))
print(params[5].size())  # conv1's .weight
print("Model's state_dict:")
for param_tensor in model1.state_dict():
    print(param_tensor, "\t", model1.state_dict()[param_tensor].size())
loss_val_best = 500000
R2_best=0.5
compute_r2=R2Loss()

Statini_sq_N2OGRU(
  (gru1): GRU(20, 64, num_layers=2, dropout=0.2)
  (gru2): GRU(20, 64, num_layers=2, dropout=0.2)
  (densor1): Linear(in_features=64, out_features=4, bias=True)
  (densor2): Linear(in_features=64, out_features=1, bias=True)
  (drop): Dropout(p=0.2, inplace=False)
)
20
torch.Size([192, 64])
Model's state_dict:
gru1.weight_ih_l0 	 torch.Size([192, 20])
gru1.weight_hh_l0 	 torch.Size([192, 64])
gru1.bias_ih_l0 	 torch.Size([192])
gru1.bias_hh_l0 	 torch.Size([192])
gru1.weight_ih_l1 	 torch.Size([192, 64])
gru1.weight_hh_l1 	 torch.Size([192, 64])
gru1.bias_ih_l1 	 torch.Size([192])
gru1.bias_hh_l1 	 torch.Size([192])
gru2.weight_ih_l0 	 torch.Size([192, 20])
gru2.weight_hh_l0 	 torch.Size([192, 64])
gru2.bias_ih_l0 	 torch.Size([192])
gru2.bias_hh_l0 	 torch.Size([192])
gru2.weight_ih_l1 	 torch.Size([192, 64])
gru2.weight_hh_l1 	 torch.Size([192, 64])
gru2.bias_ih_l1 	 torch.Size([192])
gru2.bias_hh_l1 	 torch.Size([192])
densor1.weight 	 torch.Size([4, 64])
densor1.b

In [10]:
#train the model and pring loss/ yearly training
starttime=time.time()
lr=0.1 #sgd
lr_adam=0.0001
optimizer = optim.Adam(model1.parameters(), lr=lr_adam) #add weight decay normally 1-9e-4
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=600, gamma=0.5)
bsz1=train_n*fln
bsz_val1=val_n*fln
bsz=35*fln
bsz_val=10*fln  # this is the batch size for validation, validation not change
totsq=Tx*tyear
#during training
slw=120
#sample the training data with sliding window
X_train_new, Y_train_new = sample_data_FN(X_train,Y_train,totsq,fn_ind)
X_val_new, Y_val_new = sample_data_FN(X_val,Y_val,totsq,fn_ind)
#print(X_train_new.size(),X_val_new.size())
batch_total=X_train_new.size(1)
batch_size=500  # this is the batch size for training
#during validation
train_losses = []
val_losses = []
maxepoch=3000
model1.train()
for epoch in range(maxepoch):
    train_loss=0.0
    val_loss=0.0
    Y_pred_all=torch.zeros(Y_train_new.size(),device=device)
    #shuffled the training data
    shuffled_b=torch.randperm(X_train_new.size()[1]) 
    X_train_new=X_train_new[:,shuffled_b,:] 
    Y_train_new=Y_train_new[:,shuffled_b,:]
    model1.zero_grad()
    for bb in range(int(batch_total/batch_size)):
        
        hidden = model1.init_hidden(batch_size)
        #generate initial sequence 
        statini_sq = Y_train_new[0,bb*batch_size:(bb+1)*batch_size,stat_ind].\
                     view(1,batch_size,len(stat_ind)).repeat(X_train_new.size(0),1,1)
        #consider midpoints
        Y_pred,hidden = model1(X_train_new[:,bb*batch_size:(bb+1)*batch_size,:],\
                                statini_sq,\
                                hidden)
        loss = myloss_mul_sum(Y_pred, Y_train_new[:,bb*batch_size:(bb+1)*batch_size,:].view(slw,batch_size,n_out),\
                                 loss_weights)
        hidden[0].detach_()
        hidden[1].detach_()
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            train_loss=train_loss+loss.item()
            Y_pred_all[:,bb*batch_size:(bb+1)*batch_size,:]=Y_pred[:,:,:]
    scheduler.step()
    #validation
    model1.eval()
    with torch.no_grad():
        train_loss=train_loss/(batch_total/batch_size)
        train_losses.append(train_loss)
        #r2 only for n2o
        train_R2=compute_r2(Y_pred_all[:,:,0].contiguous().view(-1),Y_train_new[:,:,0].contiguous().view(-1)).item()
        Y_val_pred=torch.zeros(Y_val_new.size(),device=device)
        #validation for whole 18 years!!!
        #generate initial sequence 
        statini_sq = Y_val_new[0,:,stat_ind].view(1,Y_val_new.size(1),len(stat_ind)).repeat(X_val_new.size(0),1,1)
        #change intitial for year round simulation
        hidden = model1.init_hidden(X_val_new.size(1))   
        Y_val_pred, hidden = model1(X_val_new,statini_sq, hidden)
        loss = myloss_mul_sum(Y_val_pred, Y_val_new,loss_weights)
        val_loss=loss.item()
        val_losses.append(val_loss)
        #r2 only for n2o
        val_R2=compute_r2(Y_val_pred[:,:,0].contiguous().view(-1),Y_val_new[:,:,0].contiguous().view(-1)).item()
        if val_loss < loss_val_best and val_R2 > R2_best:
            loss_val_best=val_loss
            R2_best = val_R2
            f0=open(path_save,'w')
            f0.close()
            #os.remove(path_save)
            torch.save({'epoch': epoch,
                    'model_state_dict': model1.state_dict(),
                    'R2': train_R2,
                    'loss': train_loss,
                    'los_val': val_loss,
                    'R2_val': val_R2,
                    }, path_save)    
        print("finished training epoch", epoch+1)
        mtime=time.time()
        print("train_loss: ", train_loss, "train_R2", train_R2,"val_loss:",val_loss,"val_R2", val_R2,\
              "loss val best:",loss_val_best,"R2 val best:",R2_best, f"Spending time: {mtime - starttime}s")

        if train_R2 > 0.99:
            break
    model1.train()
endtime=time.time()
path_fs = path_save+'fs'
torch.save({'train_losses': train_losses,
            'val_losses': val_losses,
            'model_state_dict_fs': model1.state_dict(),
            }, path_fs)  
print("final train_loss:",train_loss,"final train_R2:",train_R2,"val_loss:",val_loss,"loss validation best:",loss_val_best)
print(f"total Training time: {endtime - starttime}s")

finished training epoch 1
train_loss:  11.426259852074958 train_R2 -0.0766153335571289 val_loss: 10.971647262573242 val_R2 -0.05228304862976074 loss val best: 500000 R2 val best: 0.5 Spending time: 1.0159797668457031s
finished training epoch 2
train_loss:  10.561539414641144 train_R2 -0.028187274932861328 val_loss: 10.210789680480957 val_R2 -0.012430071830749512 loss val best: 500000 R2 val best: 0.5 Spending time: 1.7699642181396484s
finished training epoch 3
train_loss:  9.79141157942933 train_R2 0.006058335304260254 val_loss: 9.490487098693848 val_R2 0.01800251007080078 loss val best: 500000 R2 val best: 0.5 Spending time: 2.5199592113494873s
finished training epoch 4
train_loss:  9.14917434964861 train_R2 0.022379636764526367 val_loss: 8.950665473937988 val_R2 0.031219065189361572 loss val best: 500000 R2 val best: 0.5 Spending time: 3.2749407291412354s
finished training epoch 5
train_loss:  8.68730504791458 train_R2 0.03316015005111694 val_loss: 8.599532127380371 val_R2 0.04045802

KeyboardInterrupt: 