## Analysis
- Seasons of interest - June, July, August, September.
- Districts of interest - Kolhapur, Latur

- A new LSTM model to predict rainfall.

### Import libraries

In [2]:
import warnings
warnings.filterwarnings('ignore')

import os
import shutil
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.graph_objs as go
import plotly.offline as py
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
import matplotlib.patches as mpatches 
from matplotlib.collections import PatchCollection
import plotly.figure_factory as ff
from IPython.display import HTML, display
from IPython.core import display as ICD
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

import Artificial_Neural_Networks as ANN
import ARIMA

import math
from itertools import groupby
%matplotlib inline
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.utils import plot_model
from keras.layers import Conv2D, MaxPooling2D, Flatten
from keras.layers import Input, LSTM, Embedding, Dense
from keras.models import Model, Sequential
from keras.layers.merge import concatenate
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

from importlib import reload
import itertools

Using TensorFlow backend.


### Useful functions

In [3]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def root_mean_squared_error(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    return rmse

def calculate_performance(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    rmse = root_mean_squared_error(y_true, y_pred)
    return round(mse, 3), round(mae, 3), round(mape, 3), round(rmse, 3)

### Dataset

In [4]:
PATH = 'Dataset/rainfall_data_1901_to_2002.xlsx'
data = pd.read_excel(PATH)

### Preprocess data

In [5]:
data = data.drop(columns='vlookup')
data = data[data['Year'].notnull()]
data['Year'] = data.Year.astype('int')
data.index = range(len(data))

m_data = data[data['State'] == 'Maharashtra']
m_data = m_data.drop(columns='State')

districts = m_data.District.unique()
years = list(range(1901, 2003))
months = data.columns[3:]
year_month = [str(year) + '_' + month for year in years for month in months]
dates = pd.date_range(start='1901-01', freq='MS', periods=len(years)*12)

maharashtra_data = pd.DataFrame({'Year_Month': year_month})
maharashtra_data['Date'] = dates
maharashtra_data[['Year', 'Month']] = maharashtra_data['Year_Month'].str.split('_', n=1, expand=True)
maharashtra_data = maharashtra_data.drop(columns=['Year_Month'])

for district in districts:
    df = m_data[m_data.District == district].drop(columns=['District', 'Year'])
    df = df.as_matrix().reshape((len(years) * len(months), 1))[:,0]
    maharashtra_data[district] = df

maharashtra_data.head()

Unnamed: 0,Date,Year,Month,Ahmadnagar,Akola,Amravati,Aurangabad,Bhandara,Bid,Buldana,...,Nashik,Osmanabad,Parbhani,Pune,Sangli,Satara,Solapur,Wardha,Washim,Yavatmal
0,1901-01-01,1901,Jan,2.51,34.202,35.651,10.922,23.397,16.647,31.455,...,5.063,15.5,33.207,0.922,0.138,0.197,4.496,45.05,32.485,22.031
1,1901-02-01,1901,Feb,11.489,1.099,6.822,4.362,63.844,1.916,0.823,...,1.609,2.784,4.997,7.195,0.537,0.525,3.667,28.22,3.607,16.198
2,1901-03-01,1901,Mar,11.325,30.002,36.103,25.161,33.563,27.287,28.448,...,11.196,11.333,31.625,5.105,13.09,9.566,10.33,26.398,26.557,29.318
3,1901-04-01,1901,Apr,33.931,10.248,10.636,12.714,61.56,33.211,13.902,...,7.838,34.814,41.941,35.949,50.077,30.11,59.854,29.228,18.127,30.521
4,1901-05-01,1901,May,30.401,2.891,4.173,34.244,13.665,59.027,9.397,...,7.475,52.792,31.794,36.65,78.994,65.226,50.892,8.619,6.7,13.193


In [6]:
m_data = maharashtra_data.copy()

In [7]:
m_data.head()

Unnamed: 0,Date,Year,Month,Ahmadnagar,Akola,Amravati,Aurangabad,Bhandara,Bid,Buldana,...,Nashik,Osmanabad,Parbhani,Pune,Sangli,Satara,Solapur,Wardha,Washim,Yavatmal
0,1901-01-01,1901,Jan,2.51,34.202,35.651,10.922,23.397,16.647,31.455,...,5.063,15.5,33.207,0.922,0.138,0.197,4.496,45.05,32.485,22.031
1,1901-02-01,1901,Feb,11.489,1.099,6.822,4.362,63.844,1.916,0.823,...,1.609,2.784,4.997,7.195,0.537,0.525,3.667,28.22,3.607,16.198
2,1901-03-01,1901,Mar,11.325,30.002,36.103,25.161,33.563,27.287,28.448,...,11.196,11.333,31.625,5.105,13.09,9.566,10.33,26.398,26.557,29.318
3,1901-04-01,1901,Apr,33.931,10.248,10.636,12.714,61.56,33.211,13.902,...,7.838,34.814,41.941,35.949,50.077,30.11,59.854,29.228,18.127,30.521
4,1901-05-01,1901,May,30.401,2.891,4.173,34.244,13.665,59.027,9.397,...,7.475,52.792,31.794,36.65,78.994,65.226,50.892,8.619,6.7,13.193


### Preprocess data

In [8]:
def LSTM_model(num_of_previous_months, hidden_nodes_months, 
               num_of_previous_years, hidden_nodes_years, output_nodes):
    
    visible1 = Input((num_of_previous_months, 1))
#     visible1 = Input((1, num_of_previous_months))
    extract1 = LSTM(hidden_nodes_months, activation='relu')(visible1)

    visible2 = Input((num_of_previous_years, 1))
#     visible2 = Input((1, num_of_previous_years))
    extract2 = LSTM(hidden_nodes_years, activation='relu')(visible2)

    merge = concatenate([extract1, extract2])
    output = Dense(output_nodes)(merge)
    
    model = Model(inputs = [visible1, visible2], outputs = output)
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    plot_model(model, 'Functional_LSTM.png', show_shapes=True, show_layer_names=True)
    
    return model

In [9]:
def preprocess_data(m_data, district, month, num_of_prev_months, num_of_prev_years):
    
#     rainfall_season_data = m_data[['Date', 'Year', 'Month'] + districts_of_interest]
    rainfall_data = m_data[['Date', 'Year', 'Month', district]]
    month_data = rainfall_data[rainfall_data.Month == month]
    
    start_year = int(rainfall_data.Year.min())
    last_year = int(rainfall_data.Year.max())
    current_year = start_year + num_of_previous_years
    month_data_index = month_data.index
    
    train_data_input_1 = []
    for index in month_data_index[num_of_previous_years:]:
        data = list(rainfall_data.iloc[index - num_of_previous_months:index][district])
        train_data_input_1.append(data)
    train_data_input_1 = np.array(train_data_input_1)
    shape = train_data_input_1.shape
    train_data_input_1 = train_data_input_1.reshape(shape[0], shape[1], 1)
#     train_data_input_1 = train_data_input_1.reshape(shape[0], 1, shape[1])
    
    month_data_prep = list(month_data[district])
    train_data_input_2 = []
    for i in range(0, len(month_data_prep) - num_of_previous_years):
        data = month_data_prep[i:i+num_of_previous_years]
        train_data_input_2.append(data)
    train_data_input_2 = np.array(train_data_input_2)
    shape = train_data_input_2.shape
    train_data_input_2 = train_data_input_2.reshape(shape[0], shape[1], 1)
#     train_data_input_2 = train_data_input_2.reshape(shape[0], 1, shape[1])
    
    y_train = list(month_data.iloc[num_of_previous_years:][district])
    y_train = np.array(y_train)
    y_train = np.reshape(y_train, (y_train.shape[0], 1))
    
    return train_data_input_1, train_data_input_2, y_train

In [10]:
def split_and_train_LSTM(model, input_1, input_2, y_train_main, future_steps, epochs, batch_size):
    X_train_input_1, X_test_input_1, y_train, y_test = train_test_split(input_1, y_train_main, 
                                                                    test_size=future_steps, random_state=42)
    X_train_input_2, X_test_input_2, y_train, y_test = train_test_split(input_2, y_train_main, 
                                                                    test_size=future_steps, random_state=42)
    
    model.fit([X_train_input_1, X_train_input_2], y_train, epochs=epochs, batch_size=batch_size, verbose=1, shuffle=True)
    return model, X_test_input_1, X_test_input_2, y_test

In [11]:
def predict_LSTM(model, X_test_input_1, X_test_input_2):
    y_pred = model.predict([X_test_input_1, X_test_input_2])
    return y_pred

In [12]:
def Long_Short_Term_Memory(data, district, month, num_of_prev_months, num_of_prev_years, hidden_nodes_months, hidden_nodes_years, epochs, batch_size, future_steps):
    
    model = LSTM_model(num_of_prev_months, hidden_nodes_months, num_of_prev_years, hidden_nodes_years, output_nodes)
    train_data_input_1, train_data_input_2, y_train_main = preprocess_data(m_data, district, month, num_of_prev_months, num_of_prev_years)
    model, X_test_input_1, X_test_input_2, y_test = split_and_train_LSTM(model, train_data_input_1, train_data_input_2, y_train_main, future_steps, epochs, batch_size)
    y_pred = predict_LSTM(model, X_test_input_1, X_test_input_2)
    return model, y_pred

In [13]:
future_steps = 5

# number_of_previous_months, number_of_previous_years, hidden_nodes_months, hidden_nodes_years, epochs, batch_size, future_steps
parameters_LSTM = [[3,6,9,12], [4,6,8], [6,8,10,12], [5,7,8], [100], [10], [future_steps]]
num_of_previous_months = 12
num_of_previous_years = 7
hidden_nodes_months = 12
hidden_nodes_years = 5
output_nodes = 1

In [16]:
Long_Short_Term_Memory(m_data, district, month, num_of_previous_months, num_of_previous_years,
                      hidden_nodes_months, hidden_nodes_years, 250, 10, future_steps)

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78

Epoch 98/400
Epoch 99/400
Epoch 100/400
Epoch 101/400
Epoch 102/400
Epoch 103/400
Epoch 104/400
Epoch 105/400
Epoch 106/400
Epoch 107/400
Epoch 108/400
Epoch 109/400
Epoch 110/400
Epoch 111/400
Epoch 112/400
Epoch 113/400
Epoch 114/400
Epoch 115/400
Epoch 116/400
Epoch 117/400
Epoch 118/400
Epoch 119/400
Epoch 120/400
Epoch 121/400
Epoch 122/400
Epoch 123/400
Epoch 124/400
Epoch 125/400
Epoch 126/400
Epoch 127/400
Epoch 128/400
Epoch 129/400
Epoch 130/400
Epoch 131/400
Epoch 132/400
Epoch 133/400
Epoch 134/400
Epoch 135/400
Epoch 136/400
Epoch 137/400
Epoch 138/400
Epoch 139/400
Epoch 140/400
Epoch 141/400
Epoch 142/400
Epoch 143/400
Epoch 144/400
Epoch 145/400
Epoch 146/400
Epoch 147/400
Epoch 148/400
Epoch 149/400
Epoch 150/400
Epoch 151/400
Epoch 152/400
Epoch 153/400
Epoch 154/400
Epoch 155/400
Epoch 156/400
Epoch 157/400
Epoch 158/400
Epoch 159/400
Epoch 160/400
Epoch 161/400
Epoch 162/400
Epoch 163/400
Epoch 164/400
Epoch 165/400
Epoch 166/400
Epoch 167/400
Epoch 168/400
Epoch 16

Epoch 193/400
Epoch 194/400
Epoch 195/400
Epoch 196/400
Epoch 197/400
Epoch 198/400
Epoch 199/400
Epoch 200/400
Epoch 201/400
Epoch 202/400
Epoch 203/400
Epoch 204/400
Epoch 205/400
Epoch 206/400
Epoch 207/400
Epoch 208/400
Epoch 209/400
Epoch 210/400
Epoch 211/400
Epoch 212/400
Epoch 213/400
Epoch 214/400
Epoch 215/400
Epoch 216/400
Epoch 217/400
Epoch 218/400
Epoch 219/400
Epoch 220/400
Epoch 221/400
Epoch 222/400
Epoch 223/400
Epoch 224/400
Epoch 225/400
Epoch 226/400
Epoch 227/400
Epoch 228/400
Epoch 229/400
Epoch 230/400
Epoch 231/400
Epoch 232/400
Epoch 233/400
Epoch 234/400
Epoch 235/400
Epoch 236/400
Epoch 237/400
Epoch 238/400
Epoch 239/400
Epoch 240/400
Epoch 241/400
Epoch 242/400
Epoch 243/400
Epoch 244/400
Epoch 245/400
Epoch 246/400
Epoch 247/400
Epoch 248/400
Epoch 249/400
Epoch 250/400
Epoch 251/400
Epoch 252/400
Epoch 253/400
Epoch 254/400
Epoch 255/400
Epoch 256/400
Epoch 257/400
Epoch 258/400
Epoch 259/400
Epoch 260/400
Epoch 261/400
Epoch 262/400
Epoch 263/400
Epoch 

Epoch 288/400
Epoch 289/400
Epoch 290/400
Epoch 291/400
Epoch 292/400
Epoch 293/400
Epoch 294/400
Epoch 295/400
Epoch 296/400
Epoch 297/400
Epoch 298/400
Epoch 299/400
Epoch 300/400
Epoch 301/400
Epoch 302/400
Epoch 303/400
Epoch 304/400
Epoch 305/400
Epoch 306/400
Epoch 307/400
Epoch 308/400
Epoch 309/400
Epoch 310/400
Epoch 311/400
Epoch 312/400
Epoch 313/400
Epoch 314/400
Epoch 315/400
Epoch 316/400
Epoch 317/400
Epoch 318/400
Epoch 319/400
Epoch 320/400
Epoch 321/400
Epoch 322/400
Epoch 323/400
Epoch 324/400
Epoch 325/400
Epoch 326/400
Epoch 327/400
Epoch 328/400
Epoch 329/400
Epoch 330/400
Epoch 331/400
Epoch 332/400
Epoch 333/400
Epoch 334/400
Epoch 335/400
Epoch 336/400
Epoch 337/400
Epoch 338/400
Epoch 339/400
Epoch 340/400
Epoch 341/400
Epoch 342/400
Epoch 343/400
Epoch 344/400
Epoch 345/400
Epoch 346/400
Epoch 347/400
Epoch 348/400
Epoch 349/400
Epoch 350/400
Epoch 351/400
Epoch 352/400
Epoch 353/400
Epoch 354/400
Epoch 355/400
Epoch 356/400
Epoch 357/400
Epoch 358/400
Epoch 

Epoch 383/400
Epoch 384/400
Epoch 385/400
Epoch 386/400
Epoch 387/400
Epoch 388/400
Epoch 389/400
Epoch 390/400
Epoch 391/400
Epoch 392/400
Epoch 393/400
Epoch 394/400
Epoch 395/400
Epoch 396/400
Epoch 397/400
Epoch 398/400
Epoch 399/400
Epoch 400/400


(<keras.engine.training.Model at 0x7ff1dd222780>, array([[494.6607],
        [527.5482],
        [718.7484],
        [402.4953],
        [613.7914]], dtype=float32))

In [None]:
districts_of_interest = ['Kolhapur', 'Latur']
months_of_interest = ['Jun', 'Jul', 'Aug', 'Sep']

In [15]:
district = 'Kolhapur'
month = 'Jun'

In [None]:
model = LSTM_model(num_of_previous_months, hidden_nodes_months, num_of_previous_years, hidden_nodes_years, output_nodes)
train_data_input_1, train_data_input_2, y_train_main = preprocess_data(m_data, district, month, 12, 7)

In [None]:
train_data_input_1.shape, train_data_input_2.shape, y_train_main.shape

In [None]:
train_data_input_1.shape

In [None]:
model, X_test_input_1, X_test_input_2, y_test = split_and_train_LSTM(model, train_data_input_1, train_data_input_2, future_steps, 100, 5)

In [None]:
y_pred = predict_LSTM(model, X_test_input_1, X_test_input_2)

In [None]:
y_pred

In [None]:
y_test

In [None]:
month = 'Jun'
preprocess_data(rainfall_data, month, num_of_previous_months, num_of_previous_years)

In [None]:
# for district in districts_of_interest:
#     temp_data = rainfall_season_data[['Date', 'Year', 'Month', district]]
#     for month in months_of_interest:
#         df = temp_data[temp_data.Month == month]

In [None]:
# LSTM_model(num_of_previous_months, hidden_nodes_months, num_of_previous_years, hidden_nodes_years, output_nodes)