In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd, numpy as np, tensorflow as tf

from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from skopt import BayesSearchCV
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam, RMSprop, Nadam
from tensorflow.keras.regularizers import l2

from scikeras.wrappers import KerasRegressor

In [2]:
def mean_absolute_percentage_error(y_true, y_pred):
    return tf.reduce_mean(tf.abs((y_true-y_pred) / y_true)) * 100.0

In [3]:
df = pd.read_feather("E:/Skóli/HÍ/Vélaverkfræði Master Hí/Lokaverkefni/Data/merged-full-W-Landscape-And-Station-Elevations-2-sectors-25ms-24hr-18-3-24-stripped-with-klst.feather")

df = df[df.f < df.fg]
df['gust_factor'] = df.fg / df.f

df_unfolded = df.elevations.apply(pd.Series)

df = pd.concat([df, df_unfolded], axis = 1)

df = df.dropna()
df = df.reset_index(drop = True)

In [4]:
n_elevations = df.columns[-1] + 1

In [5]:
df.iloc[:, -n_elevations:] = df.iloc[:, -n_elevations:].sub(df.station_elevation, axis = 0)

In [6]:
from sklearn.decomposition import PCA

df_landscape_elevation = df.iloc[:, -n_elevations:]

df_landscape_elevation = (df_landscape_elevation - df_landscape_elevation.mean()) / df_landscape_elevation.std()

n_components = 10

pca = PCA(n_components=n_components)
compressed_features = pca.fit_transform(df_landscape_elevation)

compressed_df = pd.DataFrame(data = compressed_features, columns = ['PC' + str(i) for i in range(n_components)])

df  = pd.concat([df, compressed_df], axis = 1)

In [7]:
from math import sqrt, sin, cos, acos, pi

def cornerFromCenterLand(row):
    X, Y, d = row.X, row.Y, row.d
    inlandX, inlandY = 520000, 485000

    len_v1 = sqrt((X-inlandX)**2 + (Y-inlandY)**2)

    v1 = ((X - inlandX)/len_v1, (Y - inlandY)/ len_v1)

    outX, outY = X + cos(d * pi / 180), Y + sin(d * pi / 180)

    len_v2 = sqrt(outX**2 + outY**2)

    v2 = (outX / len_v2, outY / len_v2)

    return acos(np.dot(v1, v2))
    

In [8]:
df['relativeCorner'] = df.apply(cornerFromCenterLand, axis = 1)

In [9]:
df.columns

Index([             'X',              'Y',           'time',          'ws_15',
               'ws_250',         'ws_500',          'wd_15',         'wd_250',
               'wd_500',           'p_15',
       ...
                  'PC1',            'PC2',            'PC3',            'PC4',
                  'PC5',            'PC6',            'PC7',            'PC8',
                  'PC9', 'relativeCorner'],
      dtype='object', length=180)

In [10]:
y = df.gust_factor
X = df[['Ri_01', 'Ri_12', 'N_01', 'N_12', 'station_elevation', 'relativeCorner'] + ['PC' + str(i) for i in range(n_components)]]

# Changing the type of X,y so as to work with Tensorflow
X, y = X.values.astype(np.float32), y.values.astype(np.float32)
scaler = StandardScaler()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
X_train = scaler.fit_transform(X_train)
X_val = scaler.fit_transform(X_val)
X_test = scaler.fit_transform(X_test)

In [11]:
def build_search_space():
    search_space = {
                    'model__n_layers': np.arange(4, 9),
                    'model__n_units': [64, 128, 256, 512],
                    'model__activation': ['relu', 'elu', 'softmax'],
                    'model__penalty': [0, 0.01, 0.1, 1],
                    'model__optimizer': ['adam', 'rmsprop', 'adamax']}
    
    return search_space

In [42]:
def build_model(n_layers, n_units, activation, penalty, optimizer):
    model = Sequential()
    model.add(Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty), input_shape = (X_train.shape[1], )))
    model.add(BatchNormalization())

    for _ in range(n_layers):
        model.add(Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty)))
        model.add(BatchNormalization())

    model.add(Dense(units = n_units, activation = 'relu', kernel_regularizer=l2(penalty)))
    model.add(Dropout(0.5))

    model.add(Dense(units = 1, activation = 'linear'))

    model.compile(optimizer = optimizer, loss = 'mean_squared_error')

    return model


In [43]:
model = KerasRegressor(build_fn = build_model, verbose = 0, epochs = 200)
search_space = build_search_space()

kfold = KFold(n_splits = 5, shuffle=True, random_state=42)

bayes = BayesSearchCV(estimator = model, search_spaces=search_space, cv = kfold, n_iter = 32, verbose = 2, random_state=42,n_jobs = -1)

In [44]:
bayes.fit(X_train, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
