In [148]:
# Importing all the necessary libraries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import seaborn as sn
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LassoCV

## Data Loading and Normalization Steps





In [None]:
dataset= pd.read_csv('/content/2510364_USA.csv') 

In [None]:
dataset.shape

In [None]:
dataset.isnull().sum() # Checking the null values

In [None]:
 dataset.drop('PGTM', inplace=True, axis=1) # In the case of U.S dataset as the PGTM column in this dataset doesnt contain values

In [None]:
dataset.dropna(inplace=True)

In [None]:
# Setting up two dataframes, one with columns having PRCP values 0 and other with 1
rain= dataset.loc[(dataset['PRCP'] > 0)]
no_rain= dataset.loc[(dataset['PRCP'] == 0)]

In [None]:
rain['PRCP'] =rain['PRCP']/rain['PRCP'] # Normalizing PRCP values

In [None]:
# Merging the dataframes having rainfall and no rainfall values to make it a binary classification problem 
Merge= [rain, no_rain]
normalized_rain = pd.concat(final)
dataset=normalized_rain.sample(frac=1).reset_index(drop=True)

In [None]:
dataset['PRCP'] = dataset['PRCP'].apply(np.int64)

In [153]:
features= dataset[[ 'TAVG', 'TMAX', 'TMIN', 'AWND', 'WDF2', 'WDF5','WSF2', 'WSF5']] # Extracting conditional features

In [None]:
label= dataset[['PRCP']].values # Extracting the decisional feature

In [None]:
x_train, x_test, y_train, y_test= train_test_split(features, label, test_size=0.1) # Dividing datset 90% training and 10% testing

# Deep Learning Models

DNN

In [None]:
model_dnn= tf.keras.Sequential([
     tf.keras.layers.Dense(units=300, input_shape=[8], activation=tf.keras.activations.relu),
     tf.keras.layers.Dense(units=400, activation=tf.keras.activations.relu),
     tf.keras.layers.Dense(units=500, activation=tf.keras.activations.relu),
    tf.keras.layers.Dense(units=600, activation=tf.keras.activations.relu),
     tf.keras.layers.Dense(units=1, activation=tf.nn.sigmoid)
    ]) # Setting up the DNN model

In [None]:
model_dnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # Compiling the model

In [None]:
history= model_dnn.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=100) # Running the overall model

In [None]:
model_dnn.evaluate(x_test, y_test)



[0.6845026016235352, 0.6940639019012451]

In [None]:
y_prediction=np.round(model_dnn.predict(x_test)) # Getting the predicted values for test set 

In [None]:
classification_report(y_prediction, y_test) # Precision, Recall and F1-Score values

In [None]:
#Function for plotting confusion matrix
def plot_corr_matrix(value_pred, actual_value):
  matrix= confusion_matrix(value_pred, actual_value)
  cm_normalized = matrix.astype('float') / matrix.sum(axis=1)[:, np.newaxis]
  plt.figure(figsize = (10,7))
  sn.set(font_scale=1.7)#for label size
  ax=sn.heatmap(cm_normalized, cmap="Blues", annot=True,annot_kws={"size": 16})
  ax.set(xlabel="Predicted")
  ax.set(ylabel="Actual")

In [None]:
plot_corr_matrix(y_prediction, y_test) # plotting confusion matrix using actual and predicted values

1D-ConvNet

In [None]:
# Reshaping dataset for 1D-ConvNet and LSTM Model
x_train= x_train.reshape(x_train.shape[0], x_train.shape[1], 1)
x_test= x_test.reshape(x_test.shape[0], x_test.shape[1], 1)

In [None]:
n_timesteps,n_features= 8, 1

In [None]:
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=5, activation='relu', input_shape=(n_timesteps,n_features)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(64, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])

In [None]:
model.fit(x_train, y_train, epochs=100, validation_data=(x_test, y_test))

In [None]:
yconv_prediction=np.round(model.predict(x_test))

In [None]:
classification_report(yconv_prediction, y_test)

In [None]:
plot_corr_matrix(yconv_prediction, y_test)

**LSTM**

In [None]:
modellstm = tf.keras.models.Sequential([
  tf.keras.layers.LSTM(100,return_sequences=True,input_shape=[8,1], activation='relu'),
  #tf.keras.layers.LSTM(50, return_sequences=True, activation='relu'),
  tf.keras.layers.LSTM(50, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
modellstm.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
history_lstm=modellstm.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=100)

In [None]:
ylstm_prediction=np.round(modellstm.predict(x_test))

In [None]:
classification_report(ylstm_prediction, y_test)

In [None]:
plot_corr_matrix(ylstm_prediction, y_test)

# Machine Learning Models

**SVM**

In [None]:
svm= SVC()

In [None]:
svm.fit(x_train, y_train)

In [None]:
accuracy_score(y_test, y_pred_svm)

In [None]:
y_pred_svm = svm.predict(x_test)

In [None]:
classification_report(y_pred_svm, y_test)

In [None]:
plot_corr_matrix(y_pred_svm, y_test)

**KNN**

In [None]:
knn= KNeighborsClassifier()

In [None]:
knn.fit(x_train, y_train)

In [None]:
y_pred_knn = knn.predict(x_test)

In [None]:
accuracy_score(y_test, y_pred_svm)

In [None]:
classification_report(y_pred_knn, y_test)

In [None]:
plot_corr_matrix(y_pred_knn, y_test)

**CART**

In [None]:
tree= DecisionTreeClassifier()

In [None]:
tree.fit(x_train, y_train)

In [None]:
y_pred_tree= tree.predict(x_test)

In [None]:
accuracy_score(y_pred_tree, y_test)

In [None]:
plot_corr_matrix(y_pred_tree, y_test)

# Feature Correlation and Feature Importance

In [141]:
dataset.columns

Index(['AWND', 'PRCP', 'SNOW', 'SNWD', 'TAVG', 'TMAX', 'TMIN', 'WDF2', 'WDF5',
       'WSF2', 'WSF5'],
      dtype='object')

In [144]:
dataset= dataset[['TAVG', 'TMAX', 'TMIN', 'AWND', 'WDF2', 'WDF5','WSF2', 'WSF5', 'PRCP']]

In [None]:
# This cell for plotting the Correlation Matrix
plt.figure(figsize=(8, 6))
# define the mask to set the values in the upper triangle to True
mask = np.triu(np.ones_like(dataset.corr(), dtype=np.bool))
heatmap = sn.heatmap(dataset.corr(), mask=mask, vmin=0, vmax=1, annot=True, cmap='Blues')

In [154]:
# Finding the most important features using LASSO regression method
reg = LassoCV()
reg.fit(features, label)
print("Best alpha using built-in LassoCV: %f" % reg.alpha_)
print("Best score using built-in LassoCV: %f" %reg.score(features,label))
coef = pd.Series(reg.coef_, index = features.columns)

Best alpha using built-in LassoCV: 0.003762
Best score using built-in LassoCV: 0.181906


  y = column_or_1d(y, warn=True)


In [None]:
print("Lasso picked " + str(sum(coef != 0)) + " variables and eliminated the other " +  str(sum(coef == 0)) + " variables")

In [None]:
# Plotting the important features
imp_coef = coef.sort_values()
import matplotlib
matplotlib.rcParams['figure.figsize'] = (7.5, 5.0)
imp_coef.plot(kind = "bar")