# Kategorinen data ja puuttuvien arvojen käsittely

In [None]:
import pandas as pd 
import numpy as np 

from sklearn import preprocessing
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import cross_val_predict
from sklearn.impute import SimpleImputer

import matplotlib.pyplot as plt
import os 

In [None]:
# luetaan data ja tallennetaan se csv-tiedostoon
if os.path.exists('abnd_listings.csv'):
    df = pd.read_csv('abnd_listings.csv')
else:
    df = pd.read_csv('https://raw.githubusercontent.com/InfoTUNI/joda2022/master/koodiesimerkit/data.csv')
    df.to_csv('abnd_listings.csv')

df.info()

In [None]:
df_no_missing = df[['host_response_time', 'host_response_rate', 'review_scores_rating']].copy()

print(df_no_missing.head())
print(df_no_missing.host_response_time.unique())

In [None]:
df_no_missing.host_response_rate = df_no_missing.host_response_rate.str.strip('%')
df_no_missing.host_response_rate = pd.to_numeric(df_no_missing.host_response_rate)

print(df_no_missing.info, '\n')
print(df_no_missing.dtypes, '\n')
print(df_no_missing.head(), '\n')

null_counts = df_no_missing.isnull().sum()
print(f"Null values count: \n {null_counts}")

In [None]:
# Pudotetaan rivit, joissa on puuttuvia arvoja
df_no_missing = df_no_missing.dropna()
df_no_missing.info()

In [None]:
# Kategorisoidaan host_response_time -sarake
le = preprocessing.LabelEncoder()
arr = le.fit_transform(df_no_missing.host_response_time)
df_no_missing.host_response_time = arr 

In [None]:
print(arr)
df_no_missing.head()

In [None]:
# Käytetään lineaarista mallia, joka ennustaa review_scores_rating -sarakeen arvoja.
lr = linear_model.LinearRegression()
y = df_no_missing.review_scores_rating
X = df_no_missing.drop(columns='review_scores_rating')


In [None]:
predictions = cross_val_predict(lr, X, y, cv=10)

fig, ax = plt.subplots(figsize = (20,10))

ax.scatter(y, predictions, edgecolors=(0, 0, 0))
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

In [None]:
print(df_no_missing.corr())


In [None]:
mse = mean_squared_error(predictions, y)
mae = mean_absolute_error(predictions, y)
print(f"Mean squared error: {mse:.2f}\nMean absolute error: {mae:.2f}")

# Toinen lähestymistapa - Täytetään tyhjät arvot mediaanilla 

In [None]:
df_imp = df[['review_scores_accuracy','review_scores_cleanliness',
         'review_scores_checkin','review_scores_communication',
         'review_scores_location','review_scores_value',
         'review_scores_rating']].copy()

print(df_imp.isnull().sum())


In [None]:
# Tarkastellaan otettujen muuttujien mediaaneja
df_imp.median()

In [None]:
# Lisätään tyhjiin arvioihin kyseiisten sarakkeiden mediaaniarvot
df_imp = df_imp.fillna(df_imp.median())
print(df_imp.isnull().sum())

In [None]:
df_imp.info()

# Ennustetaan lineaarisella mallilla arvostelut jossa tyhjät arvot korvattu kyseisen sarakkeen mediaanilla

In [None]:
# Käytetään lineaarista mallia, joka ennustaa review_scores_rating -sarakeen arvoja.
lr = linear_model.LinearRegression()

y = df_imp.review_scores_rating
X = df_imp.drop(columns='review_scores_rating')

# Erotetaan testi ja opetusdata toisistaan
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lr.fit(X_train, y_train)
predictions = lr.predict(X_test)

In [None]:
fig, ax = plt.subplots(figsize = (20,10))
ax.scatter(y_test, predictions, edgecolors=(0, 0, 0))
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

In [None]:
print(df_imp.corr()['review_scores_rating'])

In [None]:
# Tarkastellaan lineaarisen mallin virheitä 

mse = mean_squared_error(predictions, y_test)
mae = mean_absolute_error(predictions, y_test)

print(f"Mean squared error: {mse:.2f}\nMean absolute error: {mae:.2f}")

# Pidetään tällä yllä eri mallien suorituskykyä
models_performance = pd.DataFrame({'Model Name': ['Linear Regression'], 'MSE': [mse], 'MAE': [mae], 'time': [0.0]})


# Käytetään satunnaista päätöspuumetsää ja optimoidaan hyperparametrit satunnaisella haulla 

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
import time

rf = RandomForestRegressor()

# Määritellään hyperparametrien etsintäavaruus
param_space = {
    'n_estimators': np.arange(1, 500, 10),
    'max_depth': np.arange(3, 11),
    'min_samples_split': np.arange(2, 11),
    'min_samples_leaf': np.arange(1, 6),
    'max_features': ['sqrt', 'log2', None],
    'bootstrap': [True, False]
}

# Haetaan parhaat hyperparametrit
random_search = RandomizedSearchCV(
estimator = rf,
param_distributions = param_space,
cv = 5,
n_jobs= -2,
n_iter = 1,
verbose = 0
)

start_time = time.time()
random_search.fit(X_train, y_train)
best_model = random_search.best_estimator_
end_time = time.time()
elapsed_time = end_time - start_time

# Käytetään parhaita hyperparametrejä
predictions = best_model.predict(X_test)

# Tarkastellaan mallin toimivuutta
fig, ax = plt.subplots(figsize = (20,10))
ax.scatter(y_test, predictions, edgecolors=(0, 0, 0))
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

mse = mean_squared_error(predictions, y_test)
mae = mean_absolute_error(predictions, y_test)
print(f"Mean squared error: {mse:.2f}\nMean absolute error: {mae:.2f}")

# Tallennetaan mallin suorituskyky
models_performance = pd.concat([models_performance, pd.DataFrame({'Model Name': ['RandomForestRegressor'], 'MSE': [mse], 'MAE': [mae], 'time' : [elapsed_time]})], ignore_index=True)


# Testataan XGBoost mallia ja etsitään sillekin hyperparametrit 

In [None]:
import xgboost

# Käyetään xgboost -mallia MSE objektiivilla
xgb = xgboost.XGBRegressor(objective ='reg:squarederror')

param_space = {
    'n_estimators': np.arange(1, 500, 10),
    'max_depth': np.arange(3, 11),
    'learning_rate': [0.1, 0.01, 0.001],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0],
    'gamma': [0, 1, 5],
    'reg_alpha': [0, 0.1, 0.5],
    'reg_lambda': [1, 1.5, 2] 
}

random_search = RandomizedSearchCV(
estimator = xgb,
param_distributions = param_space,
cv = 5,
n_jobs= -2,
n_iter = 1,
verbose = 0
)

start_time = time.time()
random_search.fit(X_train, y_train)
best_model = random_search.best_estimator_
end_time = time.time()
elapsed_time = end_time - start_time

# Käytetään parhaita hyperparametrejä
predictions = best_model.predict(X_test)

# Tarkastellaan mallin toimivuutta
fig, ax = plt.subplots(figsize = (20,10))
ax.scatter(y_test, predictions, edgecolors=(0, 0, 0))
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

mse = mean_squared_error(predictions, y_test)
mae = mean_absolute_error(predictions, y_test)
print(f"Mean squared error: {mse:.2f}\nMean absolute error: {mae:.2f}")

# Tallennetaan saadut arvot
models_performance = pd.concat([models_performance, pd.DataFrame({'Model Name': ['RandomForestRegressor'], 'MSE': [mse], 'MAE': [mae], 'time' : [elapsed_time]})], ignore_index=True)

# Optimoidaan NN hyperparametrit ensin satunnaisella haulla

In [None]:
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from kerastuner import RandomSearch
from keras.callbacks import ModelCheckpoint, EarlyStopping

models = []

# Standardoi ominaisuudet
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

def build_model(hp):
    model = keras.Sequential()
    model.add(keras.layers.Input(shape=(X_train_scaled.shape[1],)))
    # Määritetään hakuavaruus neuroneiden määrälle 
    model.add(keras.layers.Dense(units=hp.Int('units_0', min_value=1, max_value=128, step=4), activation='relu'))
    model.add(keras.layers.Dense(units=hp.Int('units_1', min_value=1, max_value=32, step=4), activation='relu'))        
    model.add(keras.layers.Dense(1, activation='linear'))

    model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-1, 1e-2, 1e-3])),
                  loss='mse')
    return model

tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=1,
    executions_per_trial=1,
    directory='NN_search',
    project_name='kt_random')

start_time = time.time()
tuner.search(X_train_scaled, y_train, epochs=10, validation_data=(X_test_scaled, y_test))

# Hae parasta mallia ja sen hyperparametrit ja opeta verkko näillä uudestaan pidemmällä koulutuksella
best_hyperparameters = tuner.get_best_hyperparameters()[0]
print("Parhaat hyperparametrit:\n", best_hyperparameters.values)

model_random = build_model(best_hyperparameters)

checkpoint_filepath = './NN_search/kt_random/best_model_weights.weights.h5'
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    mode='min',
    save_best_only=True)

model_random.fit(X_train_scaled, y_train, epochs=1, validation_data=(X_test_scaled, y_test), callbacks=[model_checkpoint_callback])
model_random.load_weights(checkpoint_filepath)
end_time = time.time()
elapsed_time = end_time - start_time
models.append(model_random)

predictions = model_random.predict(X_test_scaled)
mse = mean_squared_error(predictions, y_test)
mae = mean_absolute_error(predictions, y_test)

print(f"Mean squared error: {mse:.2f}\nMean absolute error: {mae:.2f}")

# Tallennetaan saadut arvot
models_performance = pd.concat([models_performance, pd.DataFrame({'Model Name': ['NN random search'], 'MSE': [mse], 'MAE': [mae], 'time' : [elapsed_time]})], ignore_index=True)





# Optimoidaan NN hyperparametrit käyttäen Bayesilaista optimointia

In [23]:
import tensorflow as tf
from keras_tuner import BayesianOptimization
from keras import regularizers, Sequential, layers, optimizers
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TerminateOnNaN
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import KFold
from sklearn.preprocessing import RobustScaler 
from keras import initializers


# scaler = RobustScaler()
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# X_train_scaled = X_train
# X_test_scaled = X_test


def build_model(hp):
    model = keras.Sequential()
    model.add(keras.layers.Input(shape=(X_train_scaled.shape[1],)))
    
    max_neurons = [128,64]
    min_neurons = [1,6]
    step_neurons = [4,4]

    initializer_name = hp.Choice(f'initializer', values=['he_normal', 'glorot_uniform', 'lecun_normal', 'glorot_normal'])
    if initializer_name == 'he_normal':
        initializer = initializers.HeNormal()
    elif initializer_name == 'glorot_uniform':
        initializer = initializers.GlorotUniform()
    elif initializer_name == 'lecun_normal':
        initializer = initializers.LecunNormal()
    else:
        initializer = initializers.GlorotNormal()

    for idx, i in enumerate(range(2)):
        
        model.add(layers.Dense(
            units=hp.Int(f'units_{i}', min_value=min_neurons[idx], max_value=max_neurons[idx], step=step_neurons[idx]),
            activation=hp.Choice(f'activation_{i}', values=['relu', 'linear', "selu"]),
            kernel_regularizer=regularizers.l1_l2(
                l1=hp.Float(f'l1_reg_{i}', min_value=1e-6, max_value=1, sampling='log'),
                l2=hp.Float(f'l2_reg_{i}', min_value=1e-6, max_value=1, sampling='log')),
            kernel_initializer=initializer
                )
            )
            
        model.add(layers.Dropout(hp.Float(f'dropout_{i}', min_value=0.0, max_value=0.5, step=0.05)))

    model.add(layers.Dense(1, activation='linear'))

    optimizer_choice = hp.Choice('optimizer', values=['rmsprop', 'nadam', 'AdaMax'])
    learning_rate = hp.Choice('learning_rate', values=[1e-1, 1e-2, 1e-3, 1e-4])
        
    if optimizer_choice == 'rmsprop':
        optimizer = optimizers.RMSprop(learning_rate=learning_rate)
    elif optimizer_choice == 'nadam':
        optimizer = optimizers.Nadam(learning_rate=learning_rate)
    else:
        optimizer = optimizers.Adamax(learning_rate=learning_rate)

    model.compile(optimizer=optimizer, loss='mse')
    return model

# Initialize the Bayesian Optimization tuner


early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    mode='min',
    verbose=1
)

reduce_lr_callback = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.7,  # New learning rate = learning rate * factor
    patience=5,  # Number of epochs with no improvement after which learning rate will be reduced
    min_lr=1e-6,  # Lower bound on the learning rate
    verbose=1
)

terminate_on_nan = TerminateOnNaN()

start_time = time.time()
kf = KFold(n_splits=5)
models_bayes = []
round = 0
y_train.index = range(len(y_train))

for train_index, val_index in kf.split(X_train_scaled):

    tuner = BayesianOptimization(
    build_model,
    objective='val_loss',
    max_trials=420, 
    executions_per_trial=2,
    directory='NN_search',
    project_name='kt_bayesian',
    overwrite=True,
    max_consecutive_failed_trials=100,
    max_retries_per_trial = 0
)
    
    start_time = time.time()
    X_train_b, X_val_b = X_train_scaled[train_index], X_train_scaled[val_index]    
    y_train_b, y_val_b = y_train[train_index], y_train[val_index]
    y_train_b = tf.data.Dataset.from_tensor_slices(y_train_b)
    X_train_b = tf.data.Dataset.from_tensor_slices(X_train_b)
    train_dataset = tf.data.Dataset.zip((X_train_b, y_train_b)).batch(256)
    train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)
    
    tuner.search(
        train_dataset,
        epochs=50,
        validation_data=(X_val_b, y_val_b),
        callbacks=[early_stopping_callback, reduce_lr_callback, terminate_on_nan],
        verbose=2
    )

    end_time = time.time()
    elapsed_time = end_time - start_time


    best_model = tuner.get_best_models(num_models=1)[0]
    models_bayes.append(best_model)

    predictions = best_model.predict(X_test_scaled)
    mse = mean_squared_error(predictions, y_test)
    mae = mean_absolute_error(predictions, y_test)

    print(f"Elapsed time: {elapsed_time:.2f} seconds")
    print(f"Mean squared error: {mse:.2f}\nMean absolute error: {mae:.2f}")

    # Append results to your performance tracking
    models_performance = pd.concat([models_performance, pd.DataFrame({
        'Model Name': [f'Best BayesianOptimization Model {round}'],
        'MSE': [mse],
        'MAE': [mae],
        'Time': [elapsed_time]
    })], ignore_index=True)
    round += 1  



KeyboardInterrupt: 

In [None]:
from keras_tuner import Hyperband
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import regularizers, Sequential, layers, optimizers
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
from keras_tuner import Hyperband
from keras.callbacks import EarlyStopping, ModelCheckpoint,  ReduceLROnPlateau, TerminateOnNaN
from keras import regularizers, Sequential, layers, optimizers
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import KFold
from sklearn.preprocessing import RobustScaler 
from keras import initializers

scaler = RobustScaler()
# scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

best_hyperparameters = []


def build_model(hp):
    model = keras.Sequential()
    model.add(keras.layers.Input(shape=(X_train_scaled.shape[1],)))
    
    max_neurons = [128,32]
    min_neurons = [1,6]
    step_neurons = [4,1]

    initializer_name = hp.Choice(f'initializer', values=['he_normal', 'glorot_uniform', 'lecun_normal', 'glorot_normal'])
    if initializer_name == 'he_normal':
        initializer = initializers.HeNormal()
    elif initializer_name == 'glorot_uniform':
        initializer = initializers.GlorotUniform()
    elif initializer_name == 'lecun_normal':
        initializer = initializers.LecunNormal()
    else:
        initializer = initializers.GlorotNormal()

    for idx, i in enumerate(range(2)):
        
        model.add(layers.Dense(
            units=hp.Int(f'units_{i}', min_value=min_neurons[idx], max_value=max_neurons[idx], step=step_neurons[idx]),
            activation=hp.Choice(f'activation_{i}', values=['relu', 'linear', "selu"]),
            kernel_regularizer=regularizers.l1_l2(
                l1=hp.Float(f'l1_reg_{i}', min_value=1e-6, max_value=1, sampling='log'),
                l2=hp.Float(f'l2_reg_{i}', min_value=1e-6, max_value=1, sampling='log')),
            kernel_initializer=initializer
                )
            )
            
        model.add(layers.Dropout(hp.Float(f'dropout_{i}', min_value=0.0, max_value=0.5, step=0.05)))

    model.add(layers.Dense(1, activation='linear'))

    optimizer_choice = hp.Choice('optimizer', values=['sgd', 'rmsprop', 'nadam', 'AdaMax'])
    learning_rate = hp.Choice('learning_rate', values=[1e-1, 1e-2, 1e-3, 1e-4])
    
    if optimizer_choice == 'sgd':
        optimizer = optimizers.SGD(learning_rate=learning_rate)
    elif optimizer_choice == 'rmsprop':
        optimizer = optimizers.RMSprop(learning_rate=learning_rate)
    elif optimizer_choice == 'nadam':
        optimizer = optimizers.Nadam(learning_rate=learning_rate)
    else:
        optimizer = optimizers.Adamax(learning_rate=learning_rate)

    model.compile(optimizer=optimizer, loss='mse')
    return model
    
kf = KFold(n_splits=5)
models_hyperband = []


round = 0
for train_index, val_index in kf.split(X_train_scaled):
    
    start_time = time.time()
    X_train_k, X_val_k = X_train_scaled[train_index], X_train_scaled[val_index]
    y_train.index = range(len(y_train))
    y_train_k, y_val_k = y_train[train_index], y_train[val_index]

    y_train_k = tf.data.Dataset.from_tensor_slices(y_train_k)
    X_train_k = tf.data.Dataset.from_tensor_slices(X_train_k)
    train_dataset = tf.data.Dataset.zip((X_train_k, y_train_k)).batch(256)
    train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)
    
    tuner = Hyperband(
        build_model,
        objective='val_loss',
        max_epochs=50,
        factor=3,
        directory='NN_search',
        project_name='kt_hyperband',
        executions_per_trial=1,
        overwrite=True,
        hyperband_iterations=1,
        max_consecutive_failed_trials=10
    )
        

    tuner.search(train_dataset, validation_data=(X_val_k, y_val_k), epochs=30, callbacks=[ReduceLROnPlateau(monitor='val_loss', factor=0.7, patience=3, min_lr=1e-6),
                                                                                        TerminateOnNaN()])
    end_time = time.time()
    elapsed_time = end_time - start_time

    best_model = tuner.get_best_models(num_models=1)[0]
    best_hyperparameters.append(tuner.get_best_hyperparameters(num_trials=1)[0])
    predictions = best_model.predict(X_test_scaled)
    mse = mean_squared_error(predictions, y_test)
    mae = mean_absolute_error(predictions, y_test)
    models_hyperband.append(best_model)

    print(f"Round: {round}")
    print(f"Mean squared error: {mse:.2f}\nMean absolute error: {mae:.2f}")

    models_performance = pd.concat([models_performance, pd.DataFrame({'Model Name': [f'Hyperband model {round}'], 'MSE': [mse], 'MAE': [mae], 'time' : [elapsed_time]})], ignore_index=True)
    round += 1


# Otetaan neuroverkosta viimeinen kerros pois ja testataan tällä XGBoostia näin tulleihin ominaisuusvektoriin ja toteutetaan hypeparametrien satunnaishaku

In [None]:
import xgboost
from sklearn.model_selection import RandomizedSearchCV
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
from keras.models import Model

# Kerätään ensin kaikkien mallien ominaisuusvektorit
X_train_features_list = []
X_test_features_list = []

for model in models_bayes:
    feature_extractor = Model(inputs=model.inputs, outputs=model.layers[-2].output)
    X_train_features = feature_extractor.predict(X_train_scaled)
    X_test_features = feature_extractor.predict(X_test_scaled)
    
    X_train_features_list.append(X_train_features)
    X_test_features_list.append(X_test_features)

# Yhdistetään ominaisuusvektorit
X_train_combined = np.concatenate(X_train_features_list, axis=1)
X_test_combined = np.concatenate(X_test_features_list, axis=1)

# Nyt X_train_combined ja X_test_combined ovat yhdistetyt ominaisuusvektorit kaikista malleista
# Seuraavaksi koulutetaan XGBoost-malli näillä yhdistetyillä ominaisuuksilla

xgb = xgboost.XGBRegressor(objective ='reg:squarederror')
param_space = {
    'n_estimators': np.arange(1, 500, 20),
    'max_depth': np.arange(2, 11),
    'learning_rate': [0.1, 0.01, 0.001],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0],
    'gamma': [0, 1, 5],
    'reg_alpha': [0, 0.1, 0.5],
    'reg_lambda': [1, 1.5, 2]
}

random_search = RandomizedSearchCV(
    estimator=xgb,
    param_distributions=param_space,
    cv=5,
    n_jobs=-2,
    n_iter=1000,
    verbose=1
)

start_time = time.time()
random_search.fit(X_train_combined, y_train)
best_model = random_search.best_estimator_
end_time = time.time()
elapsed_time = end_time - start_time

predictions = best_model.predict(X_test_combined)
print(f"Train shape : {X_train_combined.shape} Test shape : {X_test_combined.shape}")
print(f"Train shape : {y_train.shape} Test shape : {y_test.shape}")

# Tarkastellaan mallin toimivuutta
fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(y_test, predictions, edgecolors=(0, 0, 0))
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

mse = mean_squared_error(predictions, y_test)
mae = mean_absolute_error(predictions, y_test)
print(f"Mean squared error: {mse:.2f}\nMean absolute error: {mae:.2f}")

# Tallennetaan saadut arvot
models_performance = pd.concat([models_performance, pd.DataFrame({'Model Name': ['XGBoost with NN features'], 'MSE': [mse], 'MAE': [mae], 'time' : [elapsed_time]})], ignore_index=True)

print(f"Best NN model hyperparameters: {best_hyperparameters}")


In [None]:


import seaborn as sns

# MSE:n visualisointi eri malleille
plt.figure(figsize=(10, 6))
sns.barplot(x='Model Name', y='MSE', data=models_performance)
plt.title('MSE Comparison among Different Models')
plt.ylabel('Mean Squared Error (MSE)')
plt.xlabel('Model Name')
plt.xticks(rotation=45)
plt.show()

print(models_performance)
for i, hp in enumerate(best_hyperparameters):
    print(f"Best hyperparameters for model {i+1}:")
    for key in hp.values:
        print(f"{key}: {hp.get(key)}")
    print("-" * 50) 
