In [1]:
import os

In [2]:
os.chdir("../")

In [3]:
os.chdir("src")

In [4]:
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import RandomizedSearchCV, train_test_split
# from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from keras.models import Sequential
from keras.layers import LSTM, Dense

from anomaly_detection import *
from utils import *
from data_preparation import *
from constants import DATA_PATH, SEP_CSV

from sklearn.metrics import f1_score
from sklearn.metrics import mean_squared_error

from sklearn.utils import check_random_state
from scipy.stats import randint as sp_randint

In [5]:
os.chdir("../")

In [6]:
def prepare_data(data, lookback):
    X, y = [], []
    for i in range(len(data) - lookback):
        X.append(data[i:i + lookback])
        y.append(data[i + lookback])
    X = np.array(X)
    y = np.array(y)
    return X, y

In [7]:
def create_model(lookback, threshold):
    model = Sequential()
    model.add(LSTM(50, input_shape=(10, 1)))  # Update the input_shape parameter
    model.add(Dense(1, activation='relu'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [8]:
def hyperparameter_tuning(c_FEATURE_1, c_FEATURE_2, c_FEATURE_3, c_FEATURE_4, lookback):
    # Split the data into training and testing sets
    df_prep = get_data()
    timeseries = get_filtered_timeseries(df_prep, c_FEATURE_1, c_FEATURE_2, c_FEATURE_3, c_FEATURE_4)
    scaler = MinMaxScaler(feature_range=(0, 1))
    normalized_data = scaler.fit_transform(timeseries.values)
    X, y = prepare_data(normalized_data, lookback)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define the parameter grid
    param_dist = {
        'epochs': [10, 20],
        'lookback': [10, 20],
        'batch_size': [32, 64],
        'threshold': [0.1, 0.2]
    }

    # Create the model
    model = KerasRegressor(build_fn=create_model, lookback=lookback, threshold=0.2, verbose=0)

    # Perform random search
    random_search = RandomizedSearchCV(model, param_distributions=param_dist, scoring='neg_mean_squared_error', cv=3, n_iter=5)
    random_search.fit(X_train, y_train)

    # Get the best hyperparameters and MSE score
    best_hyperparameters = random_search.best_params_
    best_mse_score = -random_search.best_score_

    return best_hyperparameters, best_mse_score

In [10]:
best_hyperparameters, best_mse_score = hyperparameter_tuning('Belgien', 'MINI', 'Cabrio', '', lookback=10)

print("Best Hyperparameters:", best_hyperparameters)
print("Best mse_score:", best_mse_score)

Best Hyperparameters: {'threshold': 0.1, 'lookback': 20, 'epochs': 20, 'batch_size': 32}
Best mse_score: 0.046489269869879335


In [11]:
best_hyperparameters, best_mse_score = hyperparameter_tuning('Niederlande', 'PGO', 'Roadster', 'unspec.', lookback=10)

print("Best Hyperparameters:", best_hyperparameters)
print("Best mse_score:", best_mse_score)

Best Hyperparameters: {'threshold': 0.2, 'lookback': 10, 'epochs': 10, 'batch_size': 32}
Best mse_score: 0.078790028069067


In [12]:
best_hyperparameters, best_mse_score = hyperparameter_tuning('Belgien', 'Alfa Romeo', 'Coupe', '4C', lookback=10)

print("Best Hyperparameters:", best_hyperparameters)
print("Best mse_score:", best_mse_score)

Best Hyperparameters: {'threshold': 0.2, 'lookback': 20, 'epochs': 10, 'batch_size': 32}
Best mse_score: 0.04456632745333974
