In [2]:
import os
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras import initializers
import matplotlib.pyplot as mpl
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [3]:
#Get the current working directory
cwd = os.getcwd()
print(cwd)

C:\Users\kayde\OneDrive\Desktop\mannada\RainfallData


In [4]:
#the variables as recorded by the station
variables = [
        'dswr',
        'lftx',
        'mslp',
        'p__f',
        'p__u',
        'p__v',
        'p__z',
        'p_th',
        'p_zh',
        'p5_f',
        'p5_u',
        'p5_v',
        'p5_z',
        'p5th',
        'p5zh',
        'p8_f',
        'p8_u',
        'p8_v',
        'p8_z',
        'p8th',
        'p8zh',
        'p500',
        'p850',
        'pottmp',
        'pr_wtr',
        'prec',
        'r500',
        'r850',
        'rhum',
        'shum',
        'temp',
]

#The three regions
regions = [
        '82.5', #A
        '85.0', #B
        '87.5', #C
]

'''
TAKING IN ALL THE X VARIABLES
'''

#define an empty dictionary
boxes = {}

#iterating over regions
for region in regions:
    
    df_li = pd.DataFrame() # empty dataframe
    
    #iterating over variables
    for var in variables:
        
        path = os.path.join(cwd,"DATA\\BOX_20N_%sE\\ncep_%s.dat"%(region,var)) # Cleverly curated path for automation
        
        with open(path) as file: #open the respected variable file for the region
            
            temp = file.read().splitlines()
            temp = [float(i) for i in temp]
            
        df_li[region + '_' + var] = temp #and for every variable, store that data under header: <coordinate_variable>
        
    boxes[region]=df_li #and assign that dataframe for every region
    
'''
TAKING IN ALL THE Y VALUES
'''

rain = []

for i in range(5): # iterate from 0 to 4 i.e. 5 times
    
    path = os.path.join(cwd,"DATA\\rain%d.dat"%(i+1)) # join cwd to path of rain data
    
    with open(path) as file:
        
        lines = file.read().splitlines()
        lines = [float(i) for i in lines]
        rain.append( lines )  #store in rain list

In [5]:
#time for some data refining and model training

#Trimming BOXES, this is to match with the rain data, day-by-day
for region in regions:
    boxes[region] = boxes[region].iloc[4749:,] # removing 4749 rows from front
    boxes[region] = boxes[region].iloc[:10957,] # keeping only 10957 of the rest

#sample take BOX A and PLACE 1
df = boxes['82.5']
rain1 = rain[0]

In [19]:
class predictionModel:
    '''
    This is a class defining the Prediction Neural Network, Its input filtering,
    '''
    def __init__(self,X,Y,N=1,K=31):
        self.__model = self.__neuralNetwork(N,K)
        self.__K = K
        self.__N = N
        self.__xdata = X
        self.__ydata = Y
    def __neuralNetwork(self,N,K):
        model = Sequential(
            [
                Dense(
                    31*N,
                    input_dim=31*N,
                    activation='sigmoid', #next layer is relu, so it needs normalization
                    kernel_initializer=initializers.GlorotNormal(seed=None), #glorot works good with sigmoid
                    bias_initializer='zeros',
                ),
                
                Dense(
                    2*K,
                    activation='relu', #relu, for better regression
                    kernel_initializer=initializers.HeNormal(seed=None), #He Normal goes good with relu
                    bias_initializer='zeros',
                ),
                Dense(
                    1,
                    kernel_initializer='normal',
                    bias_initializer='zeros',
                ),
            ]
        )
        model.compile(loss='mean_squared_error',optimizer='adam',metrics=['accuracy','mse'])
        return model
    def trainNetwork(self):
        xtrain, xtest, ytrain, ytest = train_test_split(self.__xdata,self.__ydata,test_size=0.2)#splitting test-train
        print("Initiating Training Sequence")
        history = self.__model.fit(
            xtrain,
            ytrain,
            epochs = 10,
            batch_size = 5,
        )
        print("\n\nInitiating Testing Sequence")
        metrics = self.__model.evaluate(
            xtest,
            ytest,
            batch_size = 5,
        )
    def evaluateNetwork(self):
        print("\nInitiating Evaluation Sequence\n")
        evaluators=[
            ('standardize',StandardScaler()),
            ('mlp',KerasRegressor(
                build_fn =  self.__neuralNetwork(self.__N,self.__K),
                epochs = 10,
                batch_size = 5,
                verbose = 0,
            )),
        ]
        pipeline = Pipeline(evaluators)
        results = cross_val_score(
            pipeline,
            self.__xdata,
            self.__ydata,
            cv = KFold(n_splits=10)
        )
        print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))
        

In [18]:
ydat = rain1
xdat = df.values.tolist()
rainFallPredictor = predictionModel(xdat,ydat,1,5)
rainFallPredictor.trainNetwork()

TypeError: cannot pickle '_thread.RLock' object

In [20]:
df

Unnamed: 0,82.5_dswr,82.5_lftx,82.5_mslp,82.5_p__f,82.5_p__u,82.5_p__v,82.5_p__z,82.5_p_th,82.5_p_zh,82.5_p5_f,...,82.5_p500,82.5_p850,82.5_pottmp,82.5_pr_wtr,82.5_prec,82.5_r500,82.5_r850,82.5_rhum,82.5_shum,82.5_temp
4749,-2.117,-0.651,0.744,0.630,-0.918,0.860,-1.867,317.749,-0.578,1.899,...,-1.209,0.273,-1.212,0.440,21.123,0.007,1.648,1.154,0.375,-0.885
4750,-2.001,-0.462,0.869,0.380,-0.821,0.738,-2.499,317.989,-0.176,1.512,...,-0.906,0.322,-1.496,0.043,10.774,-0.654,1.039,1.550,0.429,-1.094
4751,-1.980,-0.030,0.924,0.085,-0.902,0.323,-2.284,305.690,0.398,2.333,...,-1.058,0.496,-1.314,-0.207,6.282,-0.838,0.821,1.305,0.375,-1.136
4752,-0.425,0.760,1.033,0.084,-1.144,-0.478,-1.888,275.705,0.923,0.806,...,-0.413,0.644,-1.375,-0.793,0.000,-0.790,0.408,0.598,-0.117,-1.598
4753,-0.505,1.518,1.191,0.945,-1.440,0.345,-1.030,296.083,-0.186,0.498,...,-0.147,0.818,-1.314,-1.053,0.000,-0.984,-0.114,-0.137,-0.337,-1.351
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15701,0.013,1.895,1.474,-0.053,-0.514,0.659,-0.442,326.540,-0.589,0.125,...,0.801,1.165,-1.192,-1.454,0.000,-0.751,-0.398,-0.580,-1.196,-1.509
15702,-0.033,1.528,1.394,-0.153,-0.165,0.788,-0.959,343.511,-0.818,0.294,...,0.952,1.090,-1.151,-1.361,0.000,-0.965,-0.419,-0.383,-0.965,-1.260
15703,-0.177,1.555,1.247,0.890,-0.095,1.539,-1.408,351.243,-1.739,1.076,...,0.270,0.694,-1.415,-1.420,0.000,-1.246,-0.256,0.060,-0.737,-1.205
15704,-0.467,0.897,1.320,1.061,0.190,1.682,-2.428,0.497,-1.704,1.532,...,-0.185,0.694,-1.476,-0.853,0.000,-1.169,0.897,0.504,-0.429,-1.094


In [None]:
ydata2 = rain1
for i in range(10957):
    if 