In [8]:
import warnings
warnings.filterwarnings('ignore')
import xarray as xr
import numpy as np
import pandas as pd
import datetime as dt
from datetime import datetime
import numpy as np
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
import numpy as np
from sklearn.metrics import roc_curve, auc
from tensorflow import keras
from keras import layers
from keras import models
from keras.callbacks import EarlyStopping
import tensorflow as tf
from sklearn import preprocessing
from math import e
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv3D, Flatten,MaxPooling3D,AveragePooling3D, concatenate,Input ,SpatialDropout3D,Dropout
from sklearn import metrics



In [9]:
#Preprocessing functions

#3D detrend function
def detrend(x:np.ndarray,time:np.ndarray):
        nt,nx,ny = x.shape
        xtemp = x.reshape(nt,nx*ny)
        p = np.polyfit(time, xtemp, deg=3)
        fit = p[0]*(time[:,np.newaxis] **3)+ p[1]*(time[:,np.newaxis]**2) + p[2]*(time[:,np.newaxis]) + p[3]
        return x - fit.reshape(nt,nx,ny)
    
#1D detrend function
def altdetrend(x:np.ndarray,time:np.ndarray):
        nt = x.shape
        xtemp = x.reshape(nt)
        p = np.polyfit(time, x, deg=1)
        fit = p[0]*(time[:,np.newaxis])+ p[1]
        return x - fit.reshape(nt)
    
def remove_time_mean(x):
        return x - x.mean(dim='time')

def removeSC(x):
        return x.groupby('time.month').apply(remove_time_mean)
    
# Calculate std normal anomaly
def calStdNorAnom(x):
    a=[]
    for m in np.unique(x.time.dt.month):
        mData=x[x.time.dt.month==m]
        mRolling=mData.rolling(time=31, center=True).mean().bfill(dim="time").ffill(dim="time")
        sRolling=mData.rolling(time=31, center=True).std().bfill(dim="time").ffill(dim="time")
        normData=(mData-mRolling)/sRolling
        a.append(normData)
    combineArray=xr.concat(a,'time')
    outArray=combineArray.sortby('time')
    return outArray

## Input

In [10]:
#Open ERA5 Datasets
data=xr.open_dataset('ERA5_Input_Exp8.nc')
#Load in preprocessed data
cres=data.cres_pre
netTOAcs=data.netTOAcs_pre
crel=data.crel_pre

data1=xr.open_dataset('ERA5_Output_PrecipCon.nc')
pr=data1.pr

lat=cres.lat
lon=cres.lon

time=cres.time

In [11]:
#Select Monsoon Months
months=[6,7,8,9]
leadmonths=[2,3,4,5]
#varOut.where(varOut.time.dt.month.isin(months), drop=True) #Change varOut to desired variable
prec=pr.where(pr.time.dt.month.isin(months), drop=True)
cresIn=cres.where(cres.time.dt.month.isin(leadmonths), drop=True)
netTOAcsIn=netTOAcs.where(netTOAcs.time.dt.month.isin(leadmonths), drop=True)
crelIn=crel.where(crel.time.dt.month.isin(leadmonths), drop=True)

In [12]:
land=data.lsMask
land=land.where(land.time.dt.month.isin(months), drop=True)

land=land.sel(lon=slice(60,100))
land=land[:,240:321,:] #for some reason slicing latitude is producing Nans so I select lat manually

In [13]:
#Select only the SAM lat,lon range: 60-100E, 10-30N
precip=prec.sel(lon=slice(60,100))
precip=precip[:,240:321,:] #Same manual lat selection
precip=xr.where(land==0,np.nan,precip) #remove oceans, monsoon is defined as only over land 

#Do weighted correction on precipitation
weights=np.cos(np.deg2rad(precip.lat))
prec_index=precip.weighted(weights).mean(dim=('lat','lon'))
prec_index=prec_index*60*60*24 #conversion to mm/day, exluding dividing by rho and multiplying by 1000mm/m

In [14]:
#Remove seasonal cycle
prec_index=removeSC(prec_index)

In [15]:
#Normalize
prec_index=calStdNorAnom(prec_index)

In [16]:
#Detrend
time=prec_index.time
prec_index=prec_index.to_numpy()
time=time.to_numpy()
time=time.astype(int)/10**9

prec_index=altdetrend(prec_index,time)
prec_index=xr.DataArray(prec_index,coords=[time],dims=['time'])

In [17]:
#Create classes based on precipitation index
mysd=prec_index.std()
mymean=prec_index.mean()

test=pd.cut(prec_index, [mymean - mysd* 10000, mymean - mysd * 2,  mymean - mysd, mymean - 0.5*mysd, mymean + 0.5*mysd, mymean + mysd, mymean + mysd *2, mymean + mysd* 10000])

buckets=pd.Categorical(pd.cut(prec_index, [mymean - mysd* 10000, mymean - mysd * 2,  mymean - mysd, mymean - 0.5*mysd, mymean + 0.5*mysd, mymean + mysd, mymean + mysd *2, mymean + mysd* 10000])).rename_categories(['very very low','very low','low','average','high','very high','very very high'])

le = preprocessing.LabelEncoder()
le.fit(buckets)

labelprec=le.transform(buckets)

# convert integers to dummy variables (i.e. one hot encoded)
#dummy_y = np_utils.to_categorical(labelprec)
nclasses=7
dummy_y=to_categorical(labelprec,nclasses)


In [18]:
# Prepare Data for CNN

# Split data into train and test
cres_train, cres_test, crel_train, crel_test, netTOAcs_train, netTOAcs_test, y_train, y_test = train_test_split(cresIn, crelIn, netTOAcsIn, dummy_y,test_size=0.20, random_state=0)

#Add extra dimension to data, required for algorithm
crestrain=cres_train.values
crestrain=crestrain[:,:,:,None]

creltrain=crel_train.values
creltrain=creltrain[:,:,:,None]

netTOAcstrain=netTOAcs_train.values
netTOAcstrain=netTOAcstrain[:,:,:,None]

#---------------------------------------------------------
crestest=cres_test.values
crestest=crestest[:,:,:,None]

creltest=crel_test.values
creltest=creltest[:,:,:,None]

netTOAcstest=netTOAcs_test.values
netTOAcstest=netTOAcstest[:,:,:,None]

X_test=np.array([crestest,creltest,netTOAcstest])
X_train=np.array([crestrain,creltrain,netTOAcstrain])

print(X_train.shape)

X_train_reshape = np.einsum('lkija->klija',X_train)
X_train_reshape.shape

X_test_reshape = np.einsum('lkija->klija',X_test)
X_test_reshape.shape

### check for nan
np.isnan(X_test_reshape).any()


(3, 201, 721, 1440, 1)


False

In [None]:
# Fit model to training data
# # define 10-fold cross validation test harness

acc_per_fold = []
loss_per_fold = []
fold_no = 1

kfold = KFold(n_splits=10, shuffle=True, random_state=0)
for train, test in kfold.split(X_train_reshape, y_train):
    cnn3 = Sequential()
    cnn3.add(Conv3D(8, kernel_size=5, activation='relu',padding='same',
                    input_shape=(X_train_reshape[train].shape[1],
                                 X_train_reshape[train].shape[2],X_train_reshape[train].shape[3],1)),)
    cnn3.add(AveragePooling3D(pool_size=2,padding='same'))
    cnn3.add(Dropout(rate = 0.1))
    
    
    cnn3.add(Flatten())
    
    
    #Dense function adds a fully connected layer
    #Hidden layer
    cnn3.add(Dense(8, activation='relu'))
    cnn3.add(Dropout(rate = 0.1))
    #Output layer
    cnn3.add(Dense(units= nclasses, activation = "softmax")) #units is always equal to number of classes
    
    
    adam = keras.optimizers.Adam(lr=0.0001) # learning_rate
    #Adam-A optimizer method for Stochastic Optimization
    cnn3.compile(optimizer=adam, loss='categorical_crossentropy',  metrics=['accuracy'])
    epochs=10 # best average accuracy and lowest loss in validation data (cross-validation)
    #hist = cnn3.fit(X_train_reshape[train], y_train[train],  epochs=epochs, verbose=1, shuffle=True,
                         #validation_data=(X_train_reshape[test], y_train[test]))
        
    hist = cnn3.fit(X_train_reshape[train], y_train[train],  epochs=epochs, verbose=1, shuffle=True,
                         validation_data=(X_train_reshape[test], y_train[test]))
    
    # report performance
    scores = cnn3.evaluate(X_train_reshape[test], y_train[test], verbose=0)
    print(f'Score for fold {fold_no}: {cnn3.metrics_names[0]} of {scores[0]}; {cnn3.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

    # Increase fold number
    fold_no = fold_no + 1

2022-07-13 21:08:42.188259: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-07-13 21:08:42.188296: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-07-13 21:08:42.188317: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ip-172-31-16-155): /proc/driver/nvidia/version does not exist
2022-07-13 21:08:42.189671: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/10

In [None]:
pred = cnn3.predict(X_train_reshape)#, batch_size=batch_size)

In [None]:
pred.shape

In [None]:
predicted = np.argmax ( pred , axis=-1 )

In [None]:
print(predicted)
y_true=np.argmax ( y_train , axis=-1 )
print(y_true)

cnt=0
for i,p in enumerate(predicted):
    if (p==y_true[i]):
        cnt=cnt+1

print(cnt)

In [None]:
pred_test = cnn3.predict(X_test_reshape)#, batch_size=batch_size)
ytestPred=np.argmax ( pred_test , axis=-1 )
ytruePred=np.argmax ( y_test , axis=-1 )
print(ytestPred)
print(ytruePred)

cnt=0
for i,p in enumerate(ytestPred):
    if (p==ytruePred[i]):
        cnt=cnt+1

print(cnt)


In [None]:
# Print the confusion matrix
#print(metrics.confusion_matrix(ytestPred,ytruePred))

# Print the precision and recall, among other metrics
print(metrics.classification_report(ytestPred,ytruePred, digits=3))