In [59]:
import os
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras import initializers
import matplotlib.pyplot as mpl
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [60]:
#Get the current working directory
cwd = os.getcwd()
print(cwd)

C:\Users\kayde\OneDrive\Desktop\mannada\RainfallData


In [61]:
#the variables as recorded by the station
variables = [
        'dswr',
        'lftx',
        'mslp',
        'p__f',
        'p__u',
        'p__v',
        'p__z',
        'p_th',
        'p_zh',
        'p5_f',
        'p5_u',
        'p5_v',
        'p5_z',
        'p5th',
        'p5zh',
        'p8_f',
        'p8_u',
        'p8_v',
        'p8_z',
        'p8th',
        'p8zh',
        'p500',
        'p850',
        'pottmp',
        'pr_wtr',
        'prec',
        'r500',
        'r850',
        'rhum',
        'shum',
        'temp',
]

#The three regions
regions = [
        '82.5', #A
        '85.0', #B
        '87.5', #C
]

'''
TAKING IN ALL THE X VARIABLES
'''

#define an empty dictionary
boxes = {}

#iterating over regions
for region in regions:
    
    df_li = pd.DataFrame() # empty dataframe
    
    #iterating over variables
    for var in variables:
        
        path = os.path.join(cwd,"DATA\\BOX_20N_%sE\\ncep_%s.dat"%(region,var)) # Cleverly curated path for automation
        
        with open(path) as file: #open the respected variable file for the region
            
            temp = file.read().splitlines()
            temp = [float(i) for i in temp]
            
        df_li[region + '_' + var] = temp #and for every variable, store that data under header: <coordinate_variable>
        
    boxes[region]=df_li #and assign that dataframe for every region
    
'''
TAKING IN ALL THE Y VALUES
'''

rain = []

for i in range(5): # iterate from 0 to 4 i.e. 5 times
    
    path = os.path.join(cwd,"DATA\\rain%d.dat"%(i+1)) # join cwd to path of rain data
    
    with open(path) as file:
        
        lines = file.read().splitlines()
        lines = [float(i) for i in lines]
        rain.append( lines )  #store in rain list

In [62]:
#time for some data refining and model training

#Trimming BOXES, this is to match with the rain data, day-by-day
for region in regions:
    boxes[region] = boxes[region].iloc[4749:,] # removing 4749 rows from front
    boxes[region] = boxes[region].iloc[:10957,] # keeping only 10957 of the rest

#sample take BOX A and PLACE 1
df = boxes['82.5']
rain1 = rain[0]

In [63]:
class predictionModel:
    '''
    This is a class defining the Prediction Neural Network, Its input filtering,
    '''
    def __init__(self,X,Y,classStr,N=1,K=31):
        if classStr == 'regressor':
            self.__model = self.__neuralNetworkRegressor(N,K)
        elif classStr == 'classifier':
            self.__model = self.__neuralNetworkClassifier(N,K)
        else:
            print('\nError: model %s not found'%(classStr))
        self.__K = K
        self.__N = N
        self.__xdata = X
        self.__ydata = Y
        self.__class = classStr
    def __neuralNetworkRegressor(self,N,K):
        model = Sequential(
            [
                Dense(
                    31*N,
                    input_dim=31*N,
                    activation='sigmoid', #next layer is relu, so it needs normalization
                    kernel_initializer=initializers.GlorotNormal(seed=None), #glorot works good with sigmoid
                    bias_initializer='zeros',
                ),
                Dense(
                    K,
                    activation='relu', #relu, for better regression
                    kernel_initializer=initializers.HeNormal(seed=None), #He Normal goes good with relu
                    bias_initializer='zeros',
                ),
                Dense(
                    1,
                    kernel_initializer='normal',
                    bias_initializer='zeros',
                ),
            ]
        )
        model.compile(loss='mean_squared_error',optimizer='adam',metrics=['accuracy','mse'])
        return model
    
    def __neuralNetworkClassifier(self,N,K):
        
        model = Sequential(
            [
                Dense(
                    31*N,
                    input_dim=31*N,
                    activation='relu',
                    kernel_initializer=initializers.HeNormal(seed=None), #He Normal goes good with relu
                    bias_initializer='zeros',
                ),
                Dense(
                    K,
                    activation='relu', 
                    kernel_initializer=initializers.HeNormal(seed=None), #He Normal goes good with relu
                    bias_initializer='zeros',
                ),
                Dense(
                    1,
                    activation='sigmoid',
                    kernel_initializer='normal',
                    bias_initializer='zeros',
                ),
            ]
        )
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy','mse'])
        return model
    
    def trainNetwork(self):
        xtrain, xtest, ytrain, ytest = train_test_split(self.__xdata,self.__ydata,test_size=0.2)#splitting test-train
        print("Initiating Training Sequence\n")
        history = self.__model.fit(
            xtrain,
            ytrain,
            epochs = 10,
            batch_size = 5,
        )
        print("\n\nInitiating Testing Sequence\n")
        metrics = self.__model.evaluate(
            xtest,
            ytest,
            batch_size = 5,
        )
    def evaluateNetwork(self):
        '''
        This Function isn't really working now,
        Will come back later
        '''
        print("\nInitiating Evaluation Sequence\n")
        evaluators=[
            ('standardize',StandardScaler()),
            ('mlp',KerasRegressor(
                build_fn =  self.__neuralNetwork(self.__N,self.__K),
                epochs = 10,
                batch_size = 5,
                verbose = 0,
            )),
        ]
        pipeline = Pipeline(evaluators)
        results = cross_val_score(
            pipeline,
            self.__xdata,
            self.__ydata,
            cv = KFold(n_splits=10)
        )
        print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))
        

In [64]:
ydat = rain1
xdat = df.values.tolist()
rainFallPredictor = predictionModel(xdat,ydat,'regressor',1,10)
rainFallPredictor.trainNetwork()

Initiating Training Sequence

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Initiating Testing Sequence



In [65]:
ydata1 = rain1
for i in range(len(ydata1)):
    if ydata1[i] > 0:
        ydata1[i] = 1;
xdata1 = df.values.tolist()
rainFallPredictor1 = predictionModel(xdata1,ydata1,'classifier',1,200)
rainFallPredictor1.trainNetwork()

Initiating Training Sequence

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Initiating Testing Sequence



In [None]:
df['rain'] = rain1
df.drop(df[df['rain'] == 0].index, inplace = True)
ydata2 = df['rain'].values.tolist()
df = df.drop(['rain'],axis=1)
xdata2=df.values.tolist()
rainFallPredictor2 = predictionModel(xdata2,ydata2,'regressor',1,400)
rainFallPredictor2.trainNetwork()