<a href="https://colab.research.google.com/github/CharlesBarkley5/Weather-Prediction-Neural-Network/blob/main/My_Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **1. Preliminary Steps**

### Importing libraries

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import re
from keras.models import Sequential
from keras.layers import Dense, Dropout, InputLayer
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from google.colab import drive
from datetime import datetime

### Load and display data

In [None]:
drive.mount('/content/drive', force_remount=True)
data = pd.read_csv('/content/drive/MyDrive/weather.csv')
data.head() #Take a first look at how the weather data is structured


Mounted at /content/drive


Unnamed: 0,DATE,PRCP,TMAX,TMIN,RAIN
0,1948-01-01,0.47,51,42,True
1,1948-01-02,0.59,45,36,True
2,1948-01-03,0.42,45,35,True
3,1948-01-04,0.31,45,34,True
4,1948-01-05,0.17,45,32,True


### Show data types to plan out data cleaning


In [None]:
data.dtypes

DATE     object
PRCP    float64
TMAX      int64
TMIN      int64
RAIN     object
dtype: object

## **2. Data Cleaning**

### Convert the 'DATE' column into numerical values

In [None]:
#We are using a function that converts each date object into a numerical representation of the amount of seconds since the Unix Epoch on January 1st, 1970
#This is also known as the Unix timestamp of a date
def get_unix_timestamp(date):
  info = date.split("-") #Split the date into its year, month, and day
  return datetime(int(info[0]), int(info[1]), int(info[2])).timestamp()

data_2 = data.copy()
data_2['DATE'] = data['DATE'].apply(get_unix_timestamp)
data_2.head()


Unnamed: 0,DATE,PRCP,TMAX,TMIN,RAIN
0,-694310400.0,0.47,51,42,True
1,-694224000.0,0.59,45,36,True
2,-694137600.0,0.42,45,35,True
3,-694051200.0,0.31,45,34,True
4,-693964800.0,0.17,45,32,True


### Convert the 'RAIN' column into binary truth values




In [None]:
data_3 = data_2.copy()
data_3['RAIN'] = [1 if rain else 0 for rain in data_2['RAIN']]
data_3.head()

Unnamed: 0,DATE,PRCP,TMAX,TMIN,RAIN
0,-694310400.0,0.47,51,42,1
1,-694224000.0,0.59,45,36,1
2,-694137600.0,0.42,45,35,1
3,-694051200.0,0.31,45,34,1
4,-693964800.0,0.17,45,32,1


### Normalize our data in every column except 'RAIN'

In [None]:
data_final = data_3.copy()
data_final[['DATE', 'PRCP', 'TMAX', 'TMIN']] = (data_3[['DATE', 'PRCP', 'TMAX', 'TMIN']]-data_3[['DATE', 'PRCP', 'TMAX', 'TMIN']].mean())/data_3[['DATE', 'PRCP', 'TMAX', 'TMIN']].std()
data_final.head()
data_final = data_final.dropna()

## **3. Building the Neural Network**

### Split into train and test sets

In [None]:
x, y = data_final.drop('RAIN', axis=1).to_numpy(), data_final['RAIN'].to_numpy() #x = independent variables date, precipitation, temperatures, y = dependent variable of rain
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.2, random_state=2022)
num_bins = 2 #Binary classification, it can either rain or not

### Build components of model, compile, and summarize

In [None]:
def build_model(num_bins):
  model = Sequential([])
  model.add(InputLayer(input_shape=x_train.shape[1]))
  model.add(Dense(1000, activation='relu'))
  model.add(Dense(5000, activation='relu'))
  model.add(Dense(10000, activation='relu'))
  model.add(Dense(300, activation='relu'))
  model.add(Dense(2000, activation='relu'))
  model.add(Dense(4000, activation='relu'))
  model.add(Dense(num_bins, activation='softmax'))

  return model

model = build_model(num_bins)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1000)              5000      
                                                                 
 dense_1 (Dense)             (None, 5000)              5005000   
                                                                 
 dense_2 (Dense)             (None, 10000)             50010000  
                                                                 
 dense_3 (Dense)             (None, 300)               3000300   
                                                                 
 dense_4 (Dense)             (None, 2000)              602000    
                                                                 
 dense_5 (Dense)             (None, 4000)              8004000   
                                                                 
 dense_6 (Dense)             (None, 2)                 8

### Train the model

In [None]:
y_train_cat = tf.keras.utils.to_categorical(np.expand_dims(y_train, axis=1), num_classes=num_bins)
model.fit(x_train, y_train_cat, epochs=20, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f5afa126fd0>

### Evaluate model

In [None]:
y_test_cat = tf.keras.utils.to_categorical(np.expand_dims(y_test, axis=1), num_classes=num_bins)

loss, acc = model.evaluate(x_test, y_test_cat)
print(f'Test loss: {loss} \nTest Accuracy: {acc}')

Test loss: 3.8511868694079965e-10 
Test Accuracy: 1.0
