In [1]:
from fastai.tabular.all import *
from itertools import product

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import pandas as pd, numpy as np, tensorflow as tf

In [2]:
def mean_absolute_percentage_error(y_true, y_pred):
    return tf.reduce_mean(tf.abs((y_true-y_pred) / y_true)) * 100.0

In [3]:
def weighted_average(predictions, weights):
    return sum(pred * weight for pred, weight in zip(predictions, weights))

In [4]:
bLR = tf.keras.models.load_model('./saved_models/baselineLRWLandscape.keras')
cnn = tf.keras.models.load_model('./saved_models/cnn.keras')
rnn = tf.keras.models.load_model('./saved_models/rnn.keras')

In [5]:
#df = pd.read_feather('D:/Skóli/lokaverkefni_vel/data/merged-test1month-26-2-24.feather')
#df = pd.read_feather('E:/Skóli/HÍ/Vélaverkfræði Master HÍ/Lokaverkefni/Data/merged-full-25ms-24hr-28-2-24.feather')
df = pd.read_feather('E:/Skóli/HÍ/Vélaverkfræði Master HÍ/Lokaverkefni/Data/merged-w-landscape-full-25ms-24hr-28-2-24.feather')
df = df[df.f < df.fg]
df['gust_factor'] = df.fg / df.f

df = df.drop(['f', 'fg', 'fsdev', 'd', 'dsdev', 'longitude', 'latitude', 'X', 'Y', 'time', 'stod'], axis = 1)# + [f'Landscape_{i}' for i in range(70)], axis = 1)

df_unfolded = df.elevations.apply(pd.Series)
df = pd.concat([df, df_unfolded], axis = 1)

df = df.dropna()
df = df.reset_index(drop = True)

In [6]:
df

Unnamed: 0,ws_15,ws_250,ws_500,wd_15,wd_250,wd_500,p_15,p_250,p_500,t_15,t_250,t_500,Ri_01,Ri_12,N_01,N_12,gust_factor
0,19.6041,23.1354,23.5411,97.9485,99.3434,99.9549,98956.6580,96065.3307,93053.1580,272.2270,269.0631,266.6024,-0.593939,-1.137324,-0.000134,-0.000003,1.295181
1,14.4611,16.8599,17.1642,145.3825,146.3173,146.5913,100351.7109,97456.1563,94440.5234,275.0884,272.5074,270.0503,-0.422328,-1.799695,-0.000044,-0.000003,1.284157
2,8.5471,9.1812,9.1686,62.6103,65.3626,79.3139,100649.9705,97726.7943,94686.4939,273.2487,270.8995,268.9256,-1.177507,26685.778689,-0.000009,0.000068,1.283989
3,10.3157,11.1665,11.1894,88.1111,89.3584,90.2917,101001.3064,98070.3142,95018.0347,273.5012,270.9638,268.5380,-2.864259,233.822195,-0.000038,0.000002,1.317876
4,11.2918,17.3783,17.6068,116.0218,129.5856,136.2121,99682.9219,96849.4688,93917.3320,277.7801,277.3303,276.3435,0.412797,245.814617,0.000277,0.000205,1.354316
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18428,10.6554,11.5584,11.0643,6.4581,8.6411,12.4018,96755.1484,93969.3472,91068.2161,275.9923,272.9678,270.6434,-7.538465,4.194781,-0.000111,0.000016,1.302309
18429,25.4698,31.2753,33.5225,97.3362,98.8644,101.6907,99002.3281,96157.8359,93198.7578,276.0819,273.6194,271.7865,-0.042580,1.078334,-0.000026,0.000087,1.228657
18430,19.3720,22.8016,23.2493,41.1882,44.0706,46.2614,95774.5937,93026.4666,90165.3763,276.3821,273.9365,271.7808,-0.110178,12.661773,-0.000023,0.000041,1.283449
18431,23.5278,28.6951,29.9320,93.5311,95.0293,96.2047,97827.6385,94986.8069,92028.9293,273.8760,270.7487,268.4949,-0.264139,1.104732,-0.000128,0.000027,1.363419


In [7]:
y = df.gust_factor
X = df.drop(['gust_factor'], axis = 1)

# Changing the type of X,y so as to work with Tensorflow
X, y = X.values.astype(np.float32), y.values.astype(np.float32)

scaler = StandardScaler()

# Assuming 'X' is your feature matrix and 'y' is your target variable
# Replace 'X' and 'y' with your actual data

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

X_train = scaler.fit_transform(X_train)
X_val = scaler.fit_transform(X_val)
X_test = scaler.fit_transform(X_test)


In [8]:
y_pred_bLR = bLR.predict(X_test)
y_pred_cnn = cnn.predict(X_test)
y_pred_rnn = rnn.predict(X_test)

ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense_32" is incompatible with the layer: expected axis -1 of input shape to have value 5, but received input with shape (32, 16)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(32, 16), dtype=float32)
  • training=False
  • mask=None

In [None]:
best_weights, best_metric = None, float('inf')

weight_combs = [combo for combo in product(range(101), repeat = 3) if sum(combo) == 100]

for weights in weight_combs:
    ensemble_predictions = weighted_average([y_pred_bLR, y_pred_cnn, y_pred_rnn], weights) / 100
    current_metric = mean_absolute_percentage_error(y_test, ensemble_predictions)

    if current_metric < best_metric:
        best_weights = weights
        best_metric = current_metric

In [None]:
print(f"Best Weights: {best_weights}")
print(f"Best Metric: {best_metric}%")

Best Weights: (26, 0, 74)
Best Metric: 7.422733306884766%


In [None]:
# Simple non weighted average prediction
y_pred = (y_pred_bLR + y_pred_cnn + y_pred_rnn) / 3
mape = mean_absolute_percentage_error(y_test, y_pred)

print(f"MAPE: {mape} %")

MAPE: 7.561738967895508 %
