In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd, numpy as np, tensorflow as tf

from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from skopt import BayesSearchCV
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam, RMSprop, Nadam
from tensorflow.keras.regularizers import l2

from scikeras.wrappers import KerasRegressor

In [2]:
def mean_absolute_percentage_error(y_true, y_pred):
    return tf.reduce_mean(tf.abs((y_true-y_pred) / y_true)) * 100.0

In [3]:
df = pd.read_feather("E:/Skóli/HÍ/Vélaverkfræði Master Hí/Lokaverkefni/Data/merged-full-W-Landscape-And-Station-Elevations-2-sectors-25ms-24hr-18-3-24-stripped-with-klst.feather")

#with open("E:/Skóli/HÍ/Vélaverkfræði Master Hí/Lokaverkefni/Data/merged-full-W-Landscape-And-Station-Elevations-2-sectors-25ms-24hr-18-3-24-stripped-with-klst.npy", 'wb') as f:
#    np.save(f, df.to_numpy(), allow_pickle=True)


In [4]:

df = df[df.f < df.fg]
df['gust_factor'] = df.fg / df.f

df_unfolded = df.elevations.apply(pd.Series)

df = pd.concat([df, df_unfolded], axis = 1)

df = df.dropna()
df = df.reset_index(drop = True)

In [5]:
n_elevations = df.columns[-1] + 1

In [6]:
df.iloc[:, -n_elevations:] = df.iloc[:, -n_elevations:].sub(df.station_elevation, axis = 0)

In [7]:
from sklearn.decomposition import PCA

df_landscape_elevation = df.iloc[:, -n_elevations:]

df_landscape_elevation = (df_landscape_elevation - df_landscape_elevation.mean()) / df_landscape_elevation.std()

n_components = 5

pca = PCA(n_components=n_components)
compressed_features = pca.fit_transform(df_landscape_elevation)

compressed_df = pd.DataFrame(data = compressed_features, columns = ['PC' + str(i) for i in range(n_components)])

df  = pd.concat([df, compressed_df], axis = 1)

In [8]:
from math import sqrt, sin, cos, acos, pi

def cornerFromCenterLand(row):
    X, Y, d = row.X, row.Y, row.d
    inlandX, inlandY = 520000, 485000

    len_v1 = sqrt((X-inlandX)**2 + (Y-inlandY)**2)

    v1 = ((X - inlandX)/len_v1, (Y - inlandY)/ len_v1)

    outX, outY = X + cos(d * pi / 180), Y + sin(d * pi / 180)

    len_v2 = sqrt(outX**2 + outY**2)

    v2 = (outX / len_v2, outY / len_v2)

    return acos(np.dot(v1, v2))
    

In [9]:
df['relativeCorner'] = df.apply(cornerFromCenterLand, axis = 1)

In [10]:
df.columns

Index([             'X',              'Y',           'time',          'ws_15',
               'ws_250',         'ws_500',          'wd_15',         'wd_250',
               'wd_500',           'p_15',
       ...
                    136,              137,              138,              139,
                  'PC0',            'PC1',            'PC2',            'PC3',
                  'PC4', 'relativeCorner'],
      dtype='object', length=175)

In [11]:
y = df.gust_factor
X = df[['Ri_01', 'Ri_12', 'station_elevation', 'relativeCorner'] + ['PC' + str(i) for i in range(n_components)]]

# Changing the type of X,y so as to work with Tensorflow
X, y = X.values.astype(np.float32), y.values.astype(np.float32)

X, y = X[:100], y[:100]
scaler = StandardScaler()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
X_train = scaler.fit_transform(X_train)
X_val = scaler.fit_transform(X_val)
X_test = scaler.fit_transform(X_test)

In [11]:
def build_search_space():
    search_space = {
                    'model__n_layers': np.arange(4, 9),
                    'model__n_units': [64, 128, 256, 512],
                    'model__activation': ['relu', 'elu', 'softmax'],
                    'model__penalty': [0, 0.01, 0.1, 1],
                    'model__optimizer': ['adam', 'rmsprop', 'adamax']}
    
    return search_space

In [89]:
def build_search_space():
    search_space = {
                    'epochs': [100],
                    'batch_size': [128],
                    'model__n_layers': np.arange(4, 5),
                    'model__n_units': [64],
                    'model__activation': ['relu'],
                    'model__penalty': [0.1],
                    'model__optimizer': ['adam']}
    
    return search_space

In [98]:
def build_search_space():
    search_space = {
                    'epochs': [100],
                    'batch_size': [128],
    }    
    return search_space

In [49]:
# Define hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

In [93]:
def build_model(n_layers, n_units, activation, penalty, optimizer):
    model = Sequential()
    model.add(Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty), input_shape = (X_train.shape[1], )))
    model.add(BatchNormalization())

    for _ in range(n_layers):
        model.add(Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty)))
        model.add(BatchNormalization())

    model.add(Dense(units = n_units, activation = activation, kernel_regularizer=l2(penalty)))
    model.add(Dropout(0.5))

    model.add(Dense(units = 1, activation = 'linear'))

    model.compile(optimizer = optimizer, loss = 'mean_squared_error')

    return model


In [99]:
def build_model():#n_layers, n_units, activation, penalty, optimizer):
    model = Sequential()
    model.add(Dense(units = 256, activation = 'relu', kernel_regularizer=l2(0.1), input_shape = (X_train.shape[1], )))
    model.add(BatchNormalization())

    for _ in range(4):
        model.add(Dense(units = 256, activation = 'relu', kernel_regularizer=l2(0.1)))
        model.add(BatchNormalization())

    model.add(Dense(units = 256, activation = 'relu', kernel_regularizer=l2(0.1)))
    model.add(Dropout(0.5))

    model.add(Dense(units = 1, activation = 'linear'))

    model.compile(optimizer = 'adam', loss = 'mean_squared_error')

    return model

In [100]:
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV

model = KerasRegressor(build_fn = build_model, verbose = 0)
search_space = build_search_space()

#mape_scorer = make_scorer('mean_squared_error', greater_is_better=False)

kfold = KFold(n_splits = 2, shuffle=True, random_state=42)

#bayes = BayesSearchCV(estimator = model, search_spaces=search_space, scoring = mape_scorer, cv = kfold, n_iter = 1, verbose = 2, random_state=42,n_jobs = -1)
grid_search = GridSearchCV(estimator=model, param_grid=search_space, scoring = 'neg_mean_absolute_percentage_error', cv = kfold, n_jobs = -1)
grid_search.fit(X_train, y_train)

ValueError: 
All the 2 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Brynjar Geir\Documents\lokaverkefni_vel\lokaverkefni_vel\code\.venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Brynjar Geir\Documents\lokaverkefni_vel\lokaverkefni_vel\code\.venv\Lib\site-packages\scikeras\wrappers.py", line 735, in fit
    self._fit(
  File "c:\Users\Brynjar Geir\Documents\lokaverkefni_vel\lokaverkefni_vel\code\.venv\Lib\site-packages\scikeras\wrappers.py", line 900, in _fit
    self._fit_keras_model(
  File "c:\Users\Brynjar Geir\Documents\lokaverkefni_vel\lokaverkefni_vel\code\.venv\Lib\site-packages\scikeras\wrappers.py", line 515, in _fit_keras_model
    raise e
  File "c:\Users\Brynjar Geir\Documents\lokaverkefni_vel\lokaverkefni_vel\code\.venv\Lib\site-packages\scikeras\wrappers.py", line 510, in _fit_keras_model
    key = metric_name(key)
          ^^^^^^^^^^^^^^^^
  File "c:\Users\Brynjar Geir\Documents\lokaverkefni_vel\lokaverkefni_vel\code\.venv\Lib\site-packages\scikeras\utils\__init__.py", line 109, in metric_name
    fn_or_cls = keras_metric_get(metric)
                ^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Brynjar Geir\Documents\lokaverkefni_vel\lokaverkefni_vel\code\.venv\Lib\site-packages\keras\src\metrics\__init__.py", line 204, in get
    raise ValueError(f"Could not interpret metric identifier: {identifier}")
ValueError: Could not interpret metric identifier: loss


In [40]:
#bayes = BayesSearchCV(estimator = model, search_spaces=search_space, scoring = mape_scorer, cv = kfold, n_iter = 1, verbose = 2, random_state=42,n_jobs = -1)
bayes.fit(X_train, y_train)

Fitting 2 folds for each of 1 candidates, totalling 2 fits


ValueError: Could not interpret metric identifier: loss