In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, KFold
from math import cos, pi
from tqdm import tqdm
import pandas as pd, time
import pyarrow as pa
from datetime import datetime
import tensorflow as tf, pandas as pd

timestamp = datetime.now().strftime('%Y-%m-%dT%H:%M')
file_path = "../../../data/Combined/data_fg.feather"

In [2]:
import os, numpy as np
from tensorflow.keras.regularizers import l2

In [3]:
def initialize(env_var, default):
    # Initialize with default, possibly overriding by an environment var
    env_value = os.getenv(env_var)
    if isinstance(default, bool):
        value = bool(env_value=="True" or env_value=="1") if env_value else default
    else:
        value = type(default)(env_value) if env_value else default
    return value

In [4]:
INCLUDE_DEM    = initialize('INCLUDE_DEM',    True)
KERAS_TUNER    = initialize('KERAS_TUNER',    False)
BS_OPTIMIZE    = initialize('BS_OPTIMIZE',    False)
CROSSVAL_FOLDS = initialize('CROSSVAL_FOLDS', 0)
PREPROCESS     = initialize('PREPROCESS',     False)
INCLUDE_XY     = initialize('INCLUDE_XY',     True)
MAX_TRAIN_TIME = initialize('MAX_TRAIN_TIME', 3600)
LR             = initialize('LEARNING_RATE',  0.004)
NLAYERS        = initialize('NLAYERS',        5)
MAX_EPOCHS     = initialize('MAX_EPOCHS',     500)
PENALTY        = initialize('PENALTY',        1e-4)  # 5e-4
BATCH_SIZE     = initialize('BATCH_SIZE',     16384)
PARAMS         = initialize('PARAMS',         'all')
PARAMS10       = initialize('PARAMS10',       True)
TAG            = initialize('TAG',            timestamp)
TESTING        = initialize('TESTING',        False)

In [5]:
def mean_absolute_percentage_error(y_true, y_pred):
    return tf.reduce_mean(tf.abs((y_true-y_pred) / y_true)) * 100.0

In [7]:
def scale(X, scaler, max_col_to_scale):
    X[:, :max_col_to_scale] = scaler.transform(X[:, :max_col_to_scale])
    if max_col_to_scale < X.shape[1]: # Change elevation to km
        X[:, max_col_to_scale:] /= 1000
    return X

In [8]:
def scale_train(X_train, X_val, max_col_to_scale):
    scaler = StandardScaler()
    scaler.fit(X_train[:, :max_col_to_scale])
    X_train = scale(X_train, scaler, max_col_to_scale)
    X_val = scale(X_val, scaler, max_col_to_scale)
    return X_train, X_val, scaler

In [9]:
def splitNscale(X, y, max_col_to_scale):
    scaler = StandardScaler()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.2,
                                                      random_state = 42)
    scaler.fit(X_train[:, :max_col_to_scale])
    X_train = scale(X_train, scaler, max_col_to_scale)
    X_val = scale(X_val, scaler, max_col_to_scale)
    X_test = scale(X_test, scaler, max_col_to_scale)
    return X_train, y_train, X_val, y_val, X_test, y_test

In [10]:
def build_model(optimizer, penalty):
    n_units, n_layers, penalty, activation, optimizer = 96, NLAYERS, penalty, 'relu', optimizer
    # n_units, n_layers, penalty, activation, optimizer = 64, 10, 1e-4, 'relu', 'adamax'
    Input = tf.keras.layers.Input
    Dense = tf.keras.layers.Dense
    Dropout = tf.keras.layers.Dropout
    BatchNormalization = tf.keras.layers.BatchNormalization
    model = tf.keras.Sequential()
    
    model.add(Input(shape = (X.shape[1], )))
    model.add(Dense(units=n_units, activation=activation, kernel_regularizer=l2(penalty)))
    model.add(BatchNormalization())
    for _ in range(n_layers):
        model.add(Dense(units=n_units, activation=activation, kernel_regularizer=l2(penalty)))
        model.add(BatchNormalization())
    model.add(Dense(units=n_units, activation=activation, kernel_regularizer=l2(penalty)))
    model.add(Dropout(0.5))
    model.add(Dense(units = 1, activation = 'linear'))
    
    model.compile(optimizer = optimizer, loss = mean_absolute_percentage_error)
    return model

In [28]:
def build_model_wo(optimizer, penalty):
    n_units, n_layers, penalty, activation, optimizer = 96, NLAYERS, penalty, 'relu', optimizer
    # n_units, n_layers, penalty, activation, optimizer = 64, 10, 1e-4, 'relu', 'adamax'
    Input = tf.keras.layers.Input
    Dense = tf.keras.layers.Dense
    Dropout = tf.keras.layers.Dropout
    BatchNormalization = tf.keras.layers.BatchNormalization
    model = tf.keras.Sequential()
    
    model.add(Input(shape = (X_wo.shape[1], )))
    model.add(Dense(units=n_units, activation=activation, kernel_regularizer=l2(penalty)))
    model.add(BatchNormalization())
    for _ in range(n_layers):
        model.add(Dense(units=n_units, activation=activation, kernel_regularizer=l2(penalty)))
        model.add(BatchNormalization())
    model.add(Dense(units=n_units, activation=activation, kernel_regularizer=l2(penalty)))
    model.add(Dropout(0.5))
    model.add(Dense(units = 1, activation = 'linear'))
    
    model.compile(optimizer = optimizer, loss = mean_absolute_percentage_error)
    return model

In [11]:
df = pd.read_feather(file_path)
df = df.drop(['stod', 'time', 'X', 'Y', 'from_center'], axis=1)
df = df.dropna()
X, y = df.drop('gust_factor', axis=1).values, df['gust_factor'].values

In [15]:
X = np.hstack([X[:, -2:], X[:, :-2]])

In [16]:
X.shape

(2075753, 135)

In [18]:
df.columns[:10]

Index(['gust_factor', 'ws_15', 'wd_15', 't_15', 'p_15', 'Ri', 'N_squared',
       'station_elevation', 'twd', 'elevation_0'],
      dtype='object')

In [19]:
model = build_model(optimizer = 'adamax', penalty =  0.00012506)

In [20]:
X_train, y_train, X_val, y_val, X_test, y_test = splitNscale(X, y, 10)

In [21]:
history = model.fit(X_train, y_train, validation_data = (X_val, y_val), epochs = 200,#MAX_EPOCHS,
                        batch_size = 2**12, verbose = 1)
res = model.evaluate(X_test, y_test, batch_size = 2**12)

Epoch 1/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 14ms/step - loss: 43.9288 - val_loss: 15.6367
Epoch 2/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - loss: 20.6671 - val_loss: 14.9663
Epoch 3/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - loss: 19.2879 - val_loss: 14.5949
Epoch 4/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - loss: 18.3208 - val_loss: 14.3953
Epoch 5/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - loss: 17.4939 - val_loss: 14.1046
Epoch 6/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - loss: 16.5951 - val_loss: 13.7891
Epoch 7/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - loss: 15.8637 - val_loss: 13.6496
Epoch 8/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 15.1618 - val_loss: 13.3166
Epoch 9/

KeyboardInterrupt: 

In [29]:
model = build_model_wo(optimizer = 'adamax', penalty =  0.00012506)
X_wo = X[:, 2:]
X_train, y_train, X_val, y_val, X_test, y_test = splitNscale(X_wo, y, 8)
history = model.fit(X_train, y_train,
                     validation_data = (X_val, y_val),
                     epochs = 200,
                     batch_size = 2**12, verbose = 1)
res = model.evaluate(X_test, y_test, batch_size = 2**12)

Epoch 1/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - loss: 42.1216 - val_loss: 15.5228
Epoch 2/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 20.8286 - val_loss: 14.8933
Epoch 3/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 19.2471 - val_loss: 14.4968
Epoch 4/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 18.2573 - val_loss: 14.2887
Epoch 5/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 17.3802 - val_loss: 14.0544
Epoch 6/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 16.4716 - val_loss: 13.7234
Epoch 7/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 15.7038 - val_loss: 13.5824
Epoch 8/200
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 15.0513 - val_loss: 13.2846
Epoch 9/

KeyboardInterrupt: 

In [26]:
X_wo.shape, X_train.shape, X_val.shape, X_test.shape

((2075753, 133), (1328481, 133), (332121, 133), (415151, 133))