In [456]:
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
from datetime import datetime
import os
import pandas as pd
import numpy as np

In [457]:
#Opens a new dataframe with the Clean csv
cleancsv = pd.read_csv('CSV/CLEAN.csv') 
cleancsv = cleancsv.dropna().reset_index(drop=True)

In [458]:
#Convert data into Date time and create date filter
cleancsv['Date'] = pd.to_datetime(cleancsv['Date'])
cleancsv['Date'] = cleancsv['Date'] + pd.to_timedelta(cleancsv["Hr"], unit="h")
cleancsv.drop('Hr', axis=1, inplace=True)

"""
Use this in future if data set needs specific dates
prediction = data.loc{
    (untouched_csv['Date'] > datetime(x, x, x)) &
    (untouched_csv['Date'] < datetime(x, x, x,))
}
"""

"\nUse this in future if data set needs specific dates\nprediction = data.loc{\n    (untouched_csv['Date'] > datetime(x, x, x)) &\n    (untouched_csv['Date'] < datetime(x, x, x,))\n}\n"

In [459]:
#Prepare colomns into variables
data_main_air_temp = cleancsv['Mainland Air Temp']
data_humidity_per = cleancsv['Humidity (%)']
data_wind_direction = cleancsv['Direction (A)']
data_wind_speed = cleancsv['Wind Speed (A)']
data_gusting = cleancsv['Gusting']
data_pressure = cleancsv['Atmospheric Pressure (IN)']
data_rainfall = cleancsv['Precipitation Rate']
data_bay_temp = cleancsv['Bay Temp']
data_salinity = cleancsv['Salinity']
data_lbi_temp = cleancsv['LBI Air Temp']
data_ocean_temp = cleancsv['Ocean Temp']
data_onshore_flag = cleancsv['Onshore']
data_upwelling_flag = cleancsv['upwelling_flag']

#saves all input data into one Numpy array
dataset = np.column_stack([
    data_main_air_temp.values,
    data_humidity_per.values,
    #data_wind_direction.values,
    data_wind_speed.values,
    data_gusting.values,
    data_pressure.values,
    data_rainfall.values,
    data_bay_temp.values,
    data_salinity.values,
    data_lbi_temp.values,
    data_ocean_temp.values,
    data_onshore_flag.values,
    data_upwelling_flag.values,
])

#Save output data into variables and reshape it to be a 2d array
output_data = data_wind_direction.values
#output_data = np.array(output_data).reshape(-1, 1)
output_data = output_data.reshape(-1, 1)

In [460]:
#Length of training data
training_data_len = int(np.ceil(len(dataset) * 0.90)) #Use 90% of training data

In [461]:
# 1) Check for NaNs and value distribution
print(cleancsv["Direction (A)"].unique())
print("NaNs:", cleancsv["Direction (A)"].isna().sum())
print("0-degree count:", (cleancsv["Direction (A)"] == 0).sum())


[225.  270.  247.5 292.5 337.5 315.    0.  180.   90.  135.  202.5  22.5
  67.5  45.  157.5 112.5]
NaNs: 0
0-degree count: 618


In [462]:
#Scaler
scaler_x= StandardScaler()
scaler_y= StandardScaler()

scaledx = scaler_x.fit_transform(dataset)
scaledy = scaler_y.fit_transform(output_data)

training_data_x = scaledx[:training_data_len] #95% of all data
training_data_y = scaledy[:training_data_len] #95% of all data

X_train, y_train = [], []

In [463]:
#Sliding window over last 24 hrs
for i in range(24, training_data_len):
    X_train.append(training_data_x[i-24:i, :])
    y_train.append(training_data_y[i,0])

#Convert lists to arrays
X_train = np.array(X_train)
y_train = np.array(y_train).reshape(-1, 1)

In [464]:
#Build the model
model = keras.models.Sequential()

In [465]:
#Layer Zero input_shape=(X_train.shape[1], 1)
model.add(keras.layers.Input(shape=(X_train.shape[1], X_train.shape[2])))

In [466]:
#First Layer input_shape=(X_train.shape[1], 1)
model.add(keras.layers.LSTM(64, return_sequences=True))

In [467]:
#Second Layer
model.add(keras.layers.LSTM(64, return_sequences=False))

In [468]:
#3rd Layer (Dense)
model.add(keras.layers.Dense(128, activation="relu"))

In [469]:
#4th Layer (Dropout)
model.add(keras.layers.Dropout(0.5))

In [470]:
#Final Output Layer (Dense)
model.add(keras.layers.Dense(1, activation="sigmoid"))

In [471]:
#Put all the layers together
model.summary()
model.compile(optimizer="adam",
    loss="mae",           
    metrics=[keras.metrics.RootMeanSquaredError])

In [472]:
#Train the model

#epochs = # of runs
#batch size = how much data is in each batch
training = model.fit(
    X_train,
    y_train, 
    epochs=50, 
    batch_size=32,
    validation_split=0.2,
    shuffle=False,
    )

Epoch 1/50
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 0.7079 - root_mean_squared_error: 0.9068 - val_loss: 1.1245 - val_root_mean_squared_error: 1.2948
Epoch 2/50
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6554 - root_mean_squared_error: 0.8445 - val_loss: 1.0640 - val_root_mean_squared_error: 1.2111
Epoch 3/50
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6160 - root_mean_squared_error: 0.8166 - val_loss: 1.0857 - val_root_mean_squared_error: 1.2481
Epoch 4/50
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6040 - root_mean_squared_error: 0.8001 - val_loss: 1.1296 - val_root_mean_squared_error: 1.3081
Epoch 5/50
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5933 - root_mean_squared_error: 0.7966 - val_loss: 1.0957 - val_root_mean_squared_error: 1.2570
Epoch 6/50
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━

In [473]:
test_x = scaledx[training_data_len-24:]
X_test = []

#rebuild window
for i in range(24, len(test_x)):
    X_test.append(test_x[i-24:i, :])

X_test = np.array(X_test)   # (samples_test, 24, n_features)


direction_scaled_pred = model.predict(X_test)        # scaled
direction_pred = scaler_y.inverse_transform(direction_scaled_pred)  # degrees


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step


In [474]:
#Round to nearest direction
allowed_dirs = np.array([0, 22.5, 45, 67.5, 90, 112.5, 135, 157.5, 180, 202.5, 225, 247.5, 270, 292.5, 315, 337.5])   

In [475]:
print("direction_scaled_pred shape:", direction_scaled_pred.shape)
print("direction_pred shape:", direction_pred.shape)
print("first 10 direction_pred:", direction_pred[:10].ravel())


direction_scaled_pred shape: (273, 1)
direction_pred shape: (273, 1)
first 10 direction_pred: [135.4194  135.4194  135.4194  135.4194  135.4194  135.41942 135.4836
 223.3337  239.52205 240.49864]


In [476]:
print("scaledx shape:", scaledx.shape)
print("training_data_len:", training_data_len)
print("test_x shape:", test_x.shape)
print("X_test shape:", X_test.shape)

scaledx shape: (2736, 12)
training_data_len: 2463
test_x shape: (297, 12)
X_test shape: (273, 24, 12)


In [477]:
onshore_degrees = [180, 135, 157.5, 90, 67.5, 45, 22.5]


pred_deg = direction_pred.ravel()
# compute index of closest allowed direction for each prediction and save if onshore
idx = np.abs(pred_deg[:, None] - allowed_dirs[None, :]).argmin(axis=1)
onshore_pred_flag = np.isin(pred_deg, onshore_degrees).astype(int)
dir_pred_discrete = allowed_dirs[idx]

In [478]:
test_start = training_data_len
test_end   = training_data_len + direction_pred.shape[0]

test_df = cleancsv.iloc[test_start:test_end].copy()
test_df["Dir_pred"] = dir_pred_discrete.ravel()

test_df["Onshore_pred_flag"] = onshore_pred_flag

test_df.to_csv("CSV/predicted_direction_onshore.csv", index=False)


In [None]:
#BL y_true = cleancsv["Direction (A)"].iloc[test_start:test_end].values
#y_pred = dir_pred_discrete.ravel()

#print(confusion_matrix(y_true, y_pred))
#print(classification_report(y_true, y_pred, digits=3))

ValueError: continuous is not supported