In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import pandas as pd, numpy as np, tensorflow as tf

In [2]:
def mean_absolute_percentage_error(y_true, y_pred):
    return tf.reduce_mean(tf.abs((y_true-y_pred) / y_true)) * 100.0

In [3]:
def weighted_average(predictions, weights):
    return sum(pred * weight for pred, weight in zip(predictions, weights))

In [4]:
bLR = tf.keras.models.load_model('./saved_models/baselineLR.keras')
rnn = tf.keras.models.load_model('./saved_models/rnn.keras')

In [16]:
#df = pd.read_feather('D:/Skóli/lokaverkefni_vel/data/merged-test1month-26-2-24.feather')
#df = pd.read_feather('E:/Skóli/HÍ/Vélaverkfræði Master HÍ/Lokaverkefni/Data/merged-full-25ms-24hr-28-2-24.feather')
df = pd.read_feather('E:/Skóli/HÍ/Vélaverkfræði Master HÍ/Lokaverkefni/Data/merged-full-W-Landscape-And-Station-Elevations-25ms-24hr-11-3-24.feather')
df = df[df.f < df.fg]
df['gust_factor'] = df.fg / df.f

df.columns

Index(['X', 'Y', 'time', 'ws_15', 'ws_250', 'ws_500', 'wd_15', 'wd_250',
       'wd_500', 'p_15', 'p_250', 'p_500', 't_15', 't_250', 't_500', 'stod',
       'f', 'fg', 'fsdev', 'd', 'dsdev', 'longitude', 'latitude', 'Ri_01',
       'Ri_12', 'N_01', 'N_12', 'landscape_points', 'elevations',
       'gust_factor'],
      dtype='object')

In [6]:
df_unfolded = df.elevations.apply(pd.Series)
df = pd.concat([df, df_unfolded], axis = 1)

In [7]:
from sklearn.decomposition import PCA

df_landscape_elevation = df.iloc[:, -70:]

df_landscape_elevation = (df_landscape_elevation - df_landscape_elevation.mean()) / df_landscape_elevation.std()

n_components = 5

pca = PCA(n_components=n_components)
compressed_features = pca.fit_transform(df_landscape_elevation)

compressed_df = pd.DataFrame(data = compressed_features, columns = ['PC' + str(i) for i in range(n_components)])

df  = pd.concat([df, compressed_df], axis = 1)

In [8]:
df = df.drop(['f', 'fg', 'fsdev', 'd', 'dsdev', 'longitude', 'latitude', 'X', 'Y', 'time', 'stod', 'landscape_points', 'elevations', 
              'ws_15', 'ws_250',	'ws_500', 'wd_15', 'wd_250', 'wd_500', 'p_15', 'p_250', 'p_500', 't_15', 't_250', 't_500', 'N_01', 'N_12'] + [i for i in range(70)], axis = 1)

df = df.dropna()
df = df.reset_index(drop = True)

In [9]:
y = df.gust_factor
X = df.drop(['gust_factor'], axis = 1)

# Changing the type of X,y so as to work with Tensorflow
X, y = X.values.astype(np.float32), y.values.astype(np.float32)

scaler = StandardScaler()

# Assuming 'X' is your feature matrix and 'y' is your target variable
# Replace 'X' and 'y' with your actual data

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

X_train = scaler.fit_transform(X_train)
X_val = scaler.fit_transform(X_val)
X_test = scaler.fit_transform(X_test)


In [10]:
y_pred_bLR = bLR.predict(X_test)
y_pred_rnn = rnn.predict(X_test)

[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step


In [11]:
best_weights, best_metric = None, float('inf')

weight_combs = [(100 -i, i) for i in range(101)]
for weights in weight_combs:
    ensemble_predictions = weighted_average([y_pred_bLR, y_pred_rnn], weights) / 100
    current_metric = mean_absolute_percentage_error(y_test, ensemble_predictions)

    if current_metric < best_metric:
        best_weights = weights
        best_metric = current_metric

In [12]:
print(f"Best Weights: {best_weights}")
print(f"Best Metric: {best_metric}%")

Best Weights: (100, 0)
Best Metric: 7.886343955993652%


In [13]:
# Simple non weighted average prediction
y_pred = (y_pred_bLR + y_pred_rnn) / 2
mape = mean_absolute_percentage_error(y_test, y_pred)

print(f"MAPE: {mape} %")

MAPE: 7.969348907470703 %
