In [35]:
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
from datetime import datetime
import os
import pandas as pd
import numpy as np

In [36]:
#Opens a new dataframe with the Clean csv
cleancsv = pd.read_csv('CSV/CLEAN.csv')

In [37]:
#Convert data into Date time and create date filter
cleancsv['Date'] = pd.to_datetime(cleancsv['Date'])
cleancsv['Date'] = cleancsv['Date'] + pd.to_timedelta(cleancsv["Hr"], unit="h")
cleancsv.drop('Hr', axis=1, inplace=True)

"""
Use this in future if data set needs specific dates
prediction = data.loc{
    (untouched_csv['Date'] > datetime(x, x, x)) &
    (untouched_csv['Date'] < datetime(x, x, x,))
}
"""

"\nUse this in future if data set needs specific dates\nprediction = data.loc{\n    (untouched_csv['Date'] > datetime(x, x, x)) &\n    (untouched_csv['Date'] < datetime(x, x, x,))\n}\n"

In [38]:
#Prepare colomns into variables
data_main_air_temp = cleancsv['Mainland Air Temp']
data_humidity_per = cleancsv['Humidity (%)']
data_wind_direction = cleancsv['Direction (A)']
data_wind_speed = cleancsv['Wind Speed (A)']
data_gusting = cleancsv['Gusting']
data_pressure = cleancsv['Atmospheric Pressure (IN)']
data_rainfall = cleancsv['Precipitation Rate']
data_bay_temp = cleancsv['Bay Temp']
data_salinity = cleancsv['Salinity']
data_lbi_temp = cleancsv['LBI Air Temp']
data_ocean_temp = cleancsv['Ocean Temp']
data_onshore_flag = cleancsv['Onshore']
data_upwelling_flag = cleancsv['upwelling_flag']

#saves all input data into one Numpy array
dataset = np.column_stack([
    data_main_air_temp.values,
    data_humidity_per.values,
    data_wind_direction.values,
    data_wind_speed.values,
    data_gusting.values,
    #data_pressure.values,
    data_rainfall.values,
    data_bay_temp.values,
    data_salinity.values,
    data_lbi_temp.values,
    data_ocean_temp.values,
    #data_onshore_flag.values,
    data_upwelling_flag.values,
])

#Save output data into variables and reshape it to be a 2d array
output_data = data_pressure.values
output_data = np.array(output_data).reshape(-1, 1)

In [39]:
#Length of training data
training_data_len = int(np.ceil(len(dataset) * 0.95)) #Use 95% of training data

In [40]:
#Scaler
scaler_x= StandardScaler()
scaler_y= StandardScaler()

scaledx = scaler_x.fit_transform(dataset)
scaledy = scaler_y.fit_transform(output_data)

training_data_x = scaledx[:training_data_len] #95% of all data
training_data_y = scaledy[:training_data_len] #95% of all data

X_train, y_train = [], []

In [41]:
#Sliding window over last 24 hrs
for i in range(24, training_data_len):
    X_train.append(training_data_x[i-24:i, :])
    y_train.append(training_data_y[i,0])

#Convert lists to arrays
X_train = np.array(X_train)
y_train = np.array(y_train).reshape(-1, 1)

In [42]:
#Build the model
model = keras.models.Sequential()

In [43]:
#First Layer input_shape=(X_train.shape[1], 1)
model.add(keras.layers.LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2]) ))

  super().__init__(**kwargs)


In [44]:
#Second Layer
model.add(keras.layers.LSTM(64, return_sequences=False))

In [45]:
#3rd Layer (Dense)
model.add(keras.layers.Dense(128, activation="relu"))

In [46]:
#4th Layer (Dropout)
model.add(keras.layers.Dropout(0.5))

In [47]:
#Final Output Layer (Dense)
model.add(keras.layers.Dense(1))

In [48]:
#Put all the layers together
model.summary()
model.compile(optimizer="adam",
              loss="mae",
              metrics=[keras.metrics.RootMeanSquaredError()])

In [49]:
#Train the model

#epochs = # of runs
#batch size = how much data is in each batch
training = model.fit(X_train, y_train, epochs=20, batch_size=32)

Epoch 1/20
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.3433 - root_mean_squared_error: 1.0262
Epoch 2/20
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.3426 - root_mean_squared_error: 1.0260
Epoch 3/20
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.3425 - root_mean_squared_error: 1.0264
Epoch 4/20
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.3423 - root_mean_squared_error: 1.0266
Epoch 5/20
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.3425 - root_mean_squared_error: 1.0263
Epoch 6/20
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.3424 - root_mean_squared_error: 1.0266
Epoch 7/20
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.3424 - root_mean_squared_error: 1.0265
Epoch 8/20
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0

In [52]:
test_x = scaledx[training_data_len-24:]
X_test = []

#rebuild window
for i in range(24, len(test_x)):
    X_test.append(test_x[i-24:i, :])

X_test = np.array(X_test)   # (samples_test, 24, n_features)

prediction_scaled = model.predict(X_test)

# back to original units
prediction = scaler_y.inverse_transform(prediction_scaled)  


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


In [51]:
# rows of the original dataframe that correspond to X_test / prediction
test_index_start = training_data_len
test_index_end = training_data_len + prediction.shape[0]

test_df = cleancsv.iloc[test_index_start:test_index_end].copy()

# add predicted column
test_df["Onshore_pred"] = prediction.ravel()

test_df.to_csv("CSV/predictions.csv", index=False)
