In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, RepeatVector, Dense, TimeDistributed
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.optimizers import Adam
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [17]:
# Börja med att testa en av varje och sen utöka. Kolla best_params och försök att lägga värden som ligger i närheten av resultaten
param_dist = {
    'num_lstm_units': [32], # , 64, 128
    'dropout_rate': [0.1],  # , 0.2, 0.3
    'lr': [0.0001],  # , 0.001, 0.01
    'batch_size': [32],  # , 64
    'epochs': [10],  # , 20
    'activation': ["relu"],  # , "linear" , "sigmoid", "tanh", "softmax"
    # "optimizer": ["adam", "rmsprop", "nadam"],  # adagrad är bra när vi har lite data, adadelta passar för noisy gradients
    # "loss": ["mae", "mse", "binary cross-entropy"]
}

In [18]:
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])

    return np.array(Xs), np.array(ys)

In [19]:
df = pd.read_csv("data/asset-data-export_O12QnL6kAl-640876dfe5066-1678276319.csv", delimiter=";", header=3, index_col="Date/Time") 

train_size = int(len(df) * .95)
test_size = len(df) - train_size

train, test = df.iloc[0:train_size], df.iloc[train_size:len(df)]

scaler = StandardScaler()
scaler = scaler.fit(train[["Water level, Nap (cm)"]])

train["Water level, Nap (cm)"] = scaler.transform(train[["Water level, Nap (cm)"]])
test["Water level, Nap (cm)"] = scaler.transform(test[["Water level, Nap (cm)"]])

TIME_STEPS = 30

X_train, y_train = create_dataset(
    train[["Water level, Nap (cm)"]], 
    train[["Water level, Nap (cm)"]], 
    TIME_STEPS)

X_test, y_test = create_dataset(
    test[["Water level, Nap (cm)"]], 
    test[["Water level, Nap (cm)"]], 
    TIME_STEPS)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train["Water level, Nap (cm)"] = scaler.transform(train[["Water level, Nap (cm)"]])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test["Water level, Nap (cm)"] = scaler.transform(test[["Water level, Nap (cm)"]])


In [20]:
def create_model(num_lstm_units=50, dropout_rate=0.2, lr=0.001, activation='relu'):
    model = Sequential()
    model.add(LSTM(num_lstm_units, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(dropout_rate))
    model.add(RepeatVector(n=X_train.shape[1]))
    model.add(LSTM(num_lstm_units, return_sequences=True))
    model.add(Dropout(dropout_rate))
    model.add(TimeDistributed(Dense(units=X_train.shape[2], activation=activation
    )))
    optimizer = Adam(lr=lr)
    model.compile(loss='mae', optimizer=optimizer)
    return model

In [21]:

model = KerasRegressor(build_fn=create_model, verbose=0)

random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist,
                                   cv=3, n_iter=10, n_jobs=-1, verbose=1)

  model = KerasRegressor(build_fn=create_model, verbose=0)


In [22]:
random_search_results = random_search.fit(X_train, y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits


: 

In [None]:
print("Best MAE score:", -random_search_results.best_score_)
print("Best hyperparameters:", random_search_results.best_params_)