In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import pandas as pd, numpy as np, tensorflow as tf

In [2]:
def mean_absolute_percentage_error(y_true, y_pred):
    return tf.reduce_mean(tf.abs((y_true-y_pred) / y_true)) * 100.0

In [3]:
def weighted_average(predictions, weights):
    return sum(pred * weight for pred, weight in zip(predictions, weights))

In [4]:
nn128 = tf.keras.models.load_model('./saved_models/nn-128-units.keras')
nn256 = tf.keras.models.load_model('./saved_models/nn-256-units.keras')

In [9]:
#df = pd.read_feather('E:/Skóli/HÍ/Vélaverkfræði Master HÍ/Lokaverkefni/Data/merged-full-25ms-24hr-28-2-24.feather')
df = pd.read_feather('E:/Skóli/HÍ/Vélaverkfræði Master HÍ/Lokaverkefni/Data/merged-full-W-Landscape-And-Station-Elevations-25ms-24hr-11-3-24.feather')
#df = pd.read_feather('D:\Skóli\lokaverkefni_vel\data\merged-full-W-Landscape-And-Station-Elevations-25ms-24hr-11-3-24.feather')
df = df[df.f < df.fg]
df['gust_factor'] = df.fg / df.f

df_unfolded = df.elevations.apply(pd.Series)

df = pd.concat([df, df_unfolded], axis = 1)

df = df.dropna()
df = df.reset_index(drop = True)

In [10]:
df.iloc[:, -70:] = df.iloc[:, -70:].sub(df.station_elevation, axis = 0)

In [11]:
from sklearn.decomposition import PCA

df_landscape_elevation = df.iloc[:, -70:]

df_landscape_elevation = (df_landscape_elevation - df_landscape_elevation.mean()) / df_landscape_elevation.std()

n_components = 5

pca = PCA(n_components=n_components)
compressed_features = pca.fit_transform(df_landscape_elevation)

compressed_df = pd.DataFrame(data = compressed_features, columns = ['PC' + str(i) for i in range(n_components)])

df  = pd.concat([df, compressed_df], axis = 1)

In [12]:
from math import sqrt, sin, cos, acos, pi

def cornerFromCenterLand(row):
    X, Y, d = row.X, row.Y, row.d
    inlandX, inlandY = 520000, 485000

    len_v1 = sqrt((X-inlandX)**2 + (Y-inlandY)**2)

    v1 = ((X - inlandX)/len_v1, (Y - inlandY)/ len_v1)

    outX, outY = X + cos(d * pi / 180), Y + sin(d * pi / 180)

    len_v2 = sqrt(outX**2 + outY**2)

    v2 = (outX / len_v2, outY / len_v2)

    return acos(np.dot(v1, v2))
    

In [13]:
df['relativeCorner'] = df.apply(cornerFromCenterLand, axis = 1)

In [14]:
y = df.gust_factor
X = df[['Ri_01', 'Ri_12', 'X', 'Y', 'station_elevation', 'relativeCorner'] + ['PC' + str(i) for i in range(n_components)]]

# Changing the type of X,y so as to work with Tensorflow
X, y = X.values.astype(np.float32), y.values.astype(np.float32)
scaler = StandardScaler()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
X_train = scaler.fit_transform(X_train)
X_val = scaler.fit_transform(X_val)
X_test = scaler.fit_transform(X_test)

In [28]:
y_pred_nn_128 = nn128.predict(X_train)
y_pred_nn_256 = nn256.predict(X_train)

[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


In [32]:
best_weights, best_metric = None, float('inf')

weight_combs = [(100 -i, i) for i in range(101)]
for weights in weight_combs:
    ensemble_predictions = weighted_average([y_pred_nn_128, y_pred_nn_256], weights) / 100
    current_metric = mean_absolute_percentage_error(y_test, ensemble_predictions)

    if current_metric < best_metric:
        best_weights = weights
        best_metric = current_metric

In [33]:
print(f"Best Weights: {best_weights}")
print(f"Best Metric: {best_metric}%")

Best Weights: (12, 88)
Best Metric: 8.906474113464355%


In [36]:
y_pred_nn128_test, y_pred_nn256_test = nn128.predict(X_test), nn256.predict(X_test)

[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [37]:
best_weights, input_data = np.array(best_weights), np.array((y_pred_nn128_test, y_pred_nn256_test))

final_predictions = np.array(best_weights) * np.array((y_pred_nn128_test, y_pred_nn256_test)) / 100
mape = mean_absolute_percentage_error(y_test, final_predictions)
print(f'The MAPE is {mape}')

ValueError: operands could not be broadcast together with shapes (2256,) (2,2256,2) 