<a href="https://colab.research.google.com/github/Bnrobertson/Rainfall_Project/blob/Neural_network/Rainfall_Project_NN_Model_Gcolab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import our dependencies

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from google.colab import drive
from sqlalchemy import create_engine
from pathlib import Path
import os
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as skl
import tensorflow as tf
!pip install keras-tuner
import keras_tuner as kt

!pip install pandas psycopg2-binary SQLAlchemy

spark_version = 'spark-3.5.1'
os.environ['SPARK_VERSION']=spark_version

# Install Spark and Java
!apt-get update
!apt-get install openjdk-11-jdk-headless -qq > /dev/null
!wget -q https://downloads.apache.org/spark/$SPARK_VERSION/$SPARK_VERSION-bin-hadoop3.tgz
!tar xf $SPARK_VERSION-bin-hadoop3.tgz
!pip install -q findspark

# Set Environment Variables
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk-amd64"
os.environ["SPARK_HOME"] = f"/content/{spark_version}-bin-hadoop3"

# Start a SparkSession
import findspark
findspark.init()

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5
Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m27.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: psycopg2-binary
Successfully installed psycopg2-binary-2.9.9
Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:3 https://developer.download.nvidia.com/compute/cuda/repo

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
Rainfall_df = pd.read_csv("/content/drive/MyDrive/Rainfall_Project_folder/Rainfall_OG.csv")
Humidity_df = pd.read_csv("/content/drive/MyDrive/Rainfall_Project_folder/Rainfall_YB2.csv")

In [5]:
Merged_df = pd.merge(Rainfall_df, Humidity_df, on="Date")
Merged_df.head()

Unnamed: 0,Date,TempAvgºF,TempMinºF,TempMaxºF,RainTotal,WindSpeed,Pressure,Raining,HumidityMax,HumidityAvg,HumidityMin
0,2000-01-01,50.0,33.08,66.92,0.0,2.920444,30.247579,False,100.0,73.6,40.0
1,2000-01-02,57.02,42.98,69.98,0.0,7.580726,30.223955,False,86.0,73.2,56.0
2,2000-01-03,64.94,55.04,73.94,0.0,10.314759,30.15013,False,100.0,81.9,51.0
3,2000-01-04,62.06,51.08,73.04,0.314961,15.907098,29.88436,True,100.0,84.2,59.0
4,2000-01-05,39.02,26.96,51.08,0.0,5.840887,30.247579,False,92.0,57.5,37.0


In [6]:
# Drop date column
Current_Merged_df = Merged_df.drop(columns=["Date", "RainTotal", "TempAvgºF", "HumidityMax", "HumidityMin"])
Current_Merged_df.head()

Unnamed: 0,TempMinºF,TempMaxºF,WindSpeed,Pressure,Raining,HumidityAvg
0,33.08,66.92,2.920444,30.247579,False,73.6
1,42.98,69.98,7.580726,30.223955,False,73.2
2,55.04,73.94,10.314759,30.15013,False,81.9
3,51.08,73.04,15.907098,29.88436,True,84.2
4,26.96,51.08,5.840887,30.247579,False,57.5


In [7]:
# Split our preprocessed data into our features and target arrays

y = Current_Merged_df["Raining"].values
X = Current_Merged_df.drop(columns=["Raining"]).values

# Split the preprocessed data into a training and testing dataset
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [8]:
# Create a StandardScaler instances

scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [15]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=5))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))

    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

    return nn_model

In [10]:
# Kerastuner

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

In [11]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

Trial 60 Complete [00h 00m 16s]
val_accuracy: 0.8524886965751648

Best val_accuracy So Far: 0.855656087398529
Total elapsed time: 00h 08m 25s


In [12]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'relu',
 'first_units': 7,
 'num_layers': 5,
 'units_0': 7,
 'units_1': 7,
 'units_2': 5,
 'units_3': 3,
 'units_4': 7,
 'units_5': 3,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 0,
 'tuner/round': 0}

In [16]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

70/70 - 0s - loss: 0.3348 - accuracy: 0.8557 - 374ms/epoch - 5ms/step
Loss: 0.33479034900665283, Accuracy: 0.855656087398529


In [21]:
best_model.save("rainfall_model.h5")

  saving_api.save_model(
