In [None]:
import os
import re
import sys
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import keras_tuner as kt
from tensorflow.keras.optimizers import Adam, AdamW, Nadam, Adamax, SGD, RMSprop
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Input, Dense, Dropout, Normalization, BatchNormalization, Activation
from tensorflow.keras.regularizers import l1, l2, l1_l2
from tensorflow.keras.metrics import MeanAbsoluteError
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.pipeline import make_pipeline
from sklearn.inspection import permutation_importance
from sklearn.base import BaseEstimator, RegressorMixin

print("Python version:", sys.version)
print("TensorFlow version:", tf.__version__)
print("Available GPUs:", tf.config.list_physical_devices('GPU'))

In [None]:
SEED = 28112025

np.random.seed(SEED)
tf.random.set_seed(SEED)

ERROR_MARGIN_G = 2

BATCH_SIZE=256
MAX_TRIALS=20
EPOCHS=20

In [None]:
df = pd.DataFrame(pd.read_csv('3d_print_miniatures_base.csv'))

In [None]:
df.describe()

In [None]:
df.head()

In [None]:
print(f"--- ARTISTS ({df['artist'].nunique()})---")
print(sorted(df['artist'].unique()))

print()
print(f"--- MINIS ({df['mini'].nunique()})---")
minis = sorted(df['mini'].unique())
print(minis[:10])

In [None]:
plt.figure()
df["artist"].value_counts().plot(kind="bar")
plt.xlabel("Artist")
plt.ylabel("Number of Samples")
plt.title("Distribution of Artists")
plt.tight_layout()
plt.show()

In [None]:
parts_per_mini = df.groupby("mini").size()

avg_parts = parts_per_mini.mean()
max_parts = parts_per_mini.max()
min_parts = parts_per_mini.min()

print("Average parts per mini:", avg_parts)
print("Max parts for a mini:", max_parts)
print("Min parts for a mini:", min_parts)

In [None]:
def is_nan_columns(df):
    nan_rows = None
    if df.isnull().values.any():
        print("Dataset contains NaN values.")
    
        nan_counts = df.isnull().sum()
        print("NaN counts in each column:")
        print(nan_counts[nan_counts > 0])

        print()
        
        nan_rows = df[df.isnull().any(axis=1)]

    return nan_rows

df_nans = is_nan_columns(df)

In [None]:
df.dropna(subset=['weight'], inplace=True)

In [None]:
def preprocess_df(df, features_to_keep, label):
    df['kb']       = df['kb'].astype(int)
    df['volume']   = np.ceil(df['volume'] * 10) / 10

    # Polynomial Features
    df['volume_g'] = df['volume'] * 1e-3 * 1.1
    df['mass_g'] = df['mass']  * 1e-3 * 1.1
    
    # Interaction Features
    df['volume_mass_interaction'] = df['volume_g'] * df['mass_g']
    df['surface_volume_ratio'] = df['surface_area'] / df['volume']
    df['bbox_volume_ratio'] = df['bbox_area'] / df['volume']
    
    # Dimension Ratios
    df['surface_mass_ratio'] = df['surface_area'] / df['mass']
    df['bbox_mass_ratio'] = df['bbox_area'] / df['mass']

    for col in ['surface_area', 'bbox_x', 'bbox_y', 'bbox_z', 'bbox_area', 'weight', 'scale',
                'volume', 'volume_g', 'mass', 'mass_g', 'volume_mass_interaction', 'surface_volume_ratio', 'surface_mass_ratio']:
        df[col] = df[col].round(1)

    df = df[[*features_to_keep, label]]

    return df

features = [
    'artist',
    'volume',
    'mass',
    'scale',
    'bbox_x',
    'bbox_y',
    'bbox_z',
    'bbox_area',
    'surface_area',
    'volume_mass_interaction',
    'surface_volume_ratio',
    'surface_mass_ratio'
]

label = 'weight'

df = preprocess_df(df, features, label)

In [None]:
df.head()

In [None]:
def plot_feature_distribution_with_bounds(df, feature="volume", q1=0.25, q3=0.75, factor=1.5, bins=100,):
    col = df[feature]
    Q1 = col.quantile(q1)
    Q3 = col.quantile(q3)
    IQR = Q3 - Q1
    lower_bound = Q1 - factor * IQR
    upper_bound = Q3 + factor * IQR

    plt.figure()
    plt.hist(col, bins=bins)
    plt.axvline(lower_bound, linestyle="--")
    plt.axvline(upper_bound, linestyle="--")
    plt.xlabel(feature)
    plt.ylabel("count")
    plt.title(f"{feature} distribution with IQR bounds")
    plt.tight_layout()
    plt.show()

    print(f"{feature} Q1={Q1:.3f}, Q3={Q3:.3f}, IQR={IQR:.3f}")
    print(f"Bounds: [{lower_bound:.3f}, {upper_bound:.3f}]")

In [None]:
plot_feature_distribution_with_bounds(df)

In [None]:
artists = df['artist'].unique()
artist_datasets = {artist: df[df['artist'] == artist].copy() for artist in artists}

df_1 = artist_datasets[0]
df_2  = artist_datasets[1]
df_3  = artist_datasets[2]

In [None]:
def remove_outliers(df, quantile_1=0.25, quantile_3=0.85):
    Q1 = df.quantile(quantile_1)
    Q3 = df.quantile(quantile_3)
    IQR = Q3 - Q1
    
    outlier_criteria = df > (Q3 + 1.5 * IQR)
    
    return df[~(outlier_criteria).any(axis=1)]


print('Shapes before removing outliers')
print(df.shape)

df = remove_outliers(df)

print('\nShapes after removing outliers')
print(df.shape)

In [None]:
print(df.shape, df_1.shape, df_2.shape, df_3.shape)

In [None]:
def split_dataset(df, target, test_size, seed, stratified=False, stratified_column=None, num_bins=4):
    if stratified and stratified_column:
        df['stratified_bins'] = pd.qcut(df[stratified_column], q=num_bins, duplicates='drop')
        stratify_col = df['stratified_bins']
    else:
        stratify_col = None

    X = df.drop(target, axis=1)
    y = df[target]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=seed, stratify=stratify_col
    )

    if stratified and stratified_column:
        X_train = X_train.drop('stratified_bins', axis=1)
        X_test = X_test.drop('stratified_bins', axis=1)

    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = split_dataset(df, 'weight', 0.2, SEED, stratified=True, stratified_column='volume')
X_train_1, X_test_1, y_train_1, y_test_1 = split_dataset(df_1, 'weight', 0.2, SEED, stratified=True, stratified_column='volume')
X_train_2, X_test_2, y_train_2, y_test_2 = split_dataset(df_2, 'weight', 0.2, SEED, stratified=True, stratified_column='volume')
X_train_3, X_test_3, y_train_3, y_test_3 = split_dataset(df_3, 'weight', 0.2, SEED, stratified=True, stratified_column='volume')

In [None]:
X_train = np.array(X_train, dtype=np.float32)
X_train_1 = np.array(X_train_1, dtype=np.float32)
X_train_2 = np.array(X_train_2, dtype=np.float32)
X_train_3 = np.array(X_train_3, dtype=np.float32)

In [None]:
def build_and_compile_model(hp, n_features, normalizer):
    model = Sequential()

    model.add(Input(shape=(n_features,)))
    model.add(normalizer)

    init_units = hp.Int('init_units', min_value=32, max_value=1024, step=32)

    regularizer = hp.Choice('regularizer', values=['none', 'l1', 'l2', 'both'])
    l1_str = hp.Float('l1_str', min_value=1e-6, max_value=1e-2, sampling='log')
    l2_str = hp.Float('l2_str', min_value=1e-6, max_value=1e-2, sampling='log')

    tested_activations = ['relu', 'leaky_relu', 'elu', 'tanh', 'selu', 'linear', 'swish', 'mish']
    
    if regularizer == 'none':
        kernel_regularizer = None
    elif regularizer == 'l1':
        kernel_regularizer = l1(l1_str)
    elif regularizer == 'l2':
        kernel_regularizer = l2(l2_str)
    else:
        kernel_regularizer = l1_l2(l1=l1_str, l2=l2_str)
    
    model.add(Dense(init_units, kernel_regularizer=kernel_regularizer))
    model.add(Activation(hp.Choice('init_activation', values=tested_activations)))

    if hp.Choice('init_batch_normalization', values=[True, False]):
        model.add(BatchNormalization())

    num_dense_layers = hp.Int('num_dense_layers', min_value=1, max_value=8, step=1)
    num_units = [
        hp.Int(f'units_{i}', min_value=32, max_value=1024, step=32)
        for i in range(num_dense_layers)
    ]

    for i in range(num_dense_layers):
        normalization = hp.Choice(f'batch_normalization_{i}', values=[True, False])
        normalization_location = hp.Choice(
            f'batch_normalization_position_{i}', values=['before', 'after']
        )
        
        model.add(Dense(num_units[i], kernel_regularizer=kernel_regularizer))

        if normalization and normalization_location == 'before':
            model.add(BatchNormalization())
        
        model.add(Activation(hp.Choice(f'activation_{i}', values=tested_activations)))

        if normalization and normalization_location == 'after':
            model.add(BatchNormalization())

        dropout_units = hp.Float(
            f'dropout_{i}', min_value=0.0, max_value=0.75, step=0.05
        )

        if dropout_units > 0:
            model.add(Dropout(dropout_units))

    model.add(Dense(1))

    opt_name = hp.Choice('optimizer', values=['adam', 'adamw', 'nadam', 'adamax', 'sgd', 'rmsprop'])
    learning_rate = hp.Float('learning_rate', min_value=1e-6, max_value=1e-2, sampling='log')

    if opt_name == 'adam':
        optimizer = Adam(learning_rate=learning_rate)
    elif opt_name == 'adamw':
        optimizer = AdamW(learning_rate=learning_rate)
    elif opt_name == 'nadam':
        optimizer = Nadam(learning_rate=learning_rate)
    elif opt_name == 'adamax':
        optimizer = Adamax(learning_rate=learning_rate)
    elif opt_name == 'rmsprop':
        optimizer = RMSprop(learning_rate=learning_rate)
    else:
        optimizer = SGD(learning_rate=learning_rate)
    
    model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=[MeanAbsoluteError()])
    
    return model

def search_best_model(project_name, x, y, seed, epochs=50, max_trials=10, split=0.2, batch_size=None, show_model=True):
    n_features = x.shape[1]

    normalizer = Normalization(axis=-1)
    normalizer.adapt(x)

    tuner = kt.BayesianOptimization(
        lambda hp: build_and_compile_model(hp, n_features, normalizer),
        objective=kt.Objective("val_mean_absolute_error", direction="min"),
        max_trials=max_trials,
        directory='bayesian_optimization',
        project_name=project_name,
        seed=seed
    )

    tuner.search(
        x, y,
        epochs=epochs,
        validation_split=split,
        batch_size=batch_size,
        callbacks=[
            EarlyStopping(monitor='val_mean_absolute_error', min_delta=0.01, patience=10,
                          verbose=0, mode='min', restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_mean_absolute_error', factor=0.2, patience=5,
                              min_lr=1e-8, mode='min', verbose=0)
        ]
    )
    
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    best_model_from_tuner = tuner.hypermodel.build(best_hps)

    if show_model:
        print("Best Model from Tuner:")
        best_model_from_tuner.summary()

    return best_hps, best_model_from_tuner

def show_best_hps(hps):
    print("Best Hyperparameters:")
    for hp in hps.values:
        print(f"{hp}: {hps.get(hp)}")

In [None]:
best_hps, best_model = search_best_model('All_Artists', X_train, y_train, epochs=EPOCHS, max_trials=MAX_TRIALS, seed=SEED, batch_size=BATCH_SIZE)
show_best_hps(best_hps)

In [None]:
best_hps_1, best_model_1 = search_best_model('Artist_0', X_train_1, y_train_1, epochs=EPOCHS, max_trials=MAX_TRIALS, seed=SEED, batch_size=BATCH_SIZE)
show_best_hps(best_hps_1)

In [None]:
best_hps_2, best_model_2 = search_best_model('Artist_1', X_train_2, y_train_2, epochs=EPOCHS, max_trials=MAX_TRIALS, seed=SEED, batch_size=BATCH_SIZE)
show_best_hps(best_hps_2)

In [None]:
best_hps_3, best_model_3 = search_best_model('Artist_2', X_train_3, y_train_3, epochs=EPOCHS, max_trials=MAX_TRIALS, seed=SEED, batch_size=BATCH_SIZE)
show_best_hps(best_hps_3)

In [None]:
%%time

history = best_model.fit(
    X_train, y_train,
    validation_split=0.3,
    epochs=100,
    callbacks=[
        EarlyStopping(monitor='val_mean_absolute_error', min_delta=0.01, patience=10, mode='min', restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_mean_absolute_error', factor=0.2, patience=5, min_lr=1e-8, mode='min')
    ]
)

In [None]:
%%time

history_1 = best_model_1.fit(
    X_train_1, y_train_1,
    validation_split=0.3,
    epochs=100,
    callbacks=[
        EarlyStopping(monitor='val_mean_absolute_error', min_delta=0.01, patience=10, mode='min', restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_mean_absolute_error', factor=0.2, patience=5, min_lr=1e-8, mode='min')
    ]
)

In [None]:
%%time

history_2 = best_model_2.fit(
    X_train_2, y_train_2,
    validation_split=0.3,
    epochs=100,
    callbacks=[
        EarlyStopping(monitor='val_mean_absolute_error', min_delta=0.01, patience=10, mode='min', restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_mean_absolute_error', factor=0.2, patience=5, min_lr=1e-8, mode='min')
    ]
)

In [None]:
%%time

history_3 = best_model_3.fit(
    X_train_3, y_train_3,
    validation_split=0.3,
    epochs=100,
    callbacks=[
        EarlyStopping(monitor='val_mean_absolute_error', min_delta=0.01, patience=10, mode='min', restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_mean_absolute_error', factor=0.2, patience=5, min_lr=1e-8, mode='min')
    ]
)

In [None]:
def plot_loss(history, artist=None):
    plt.plot(history.history['mean_absolute_error'], label='mean_absolute_error')
    plt.plot(history.history['val_mean_absolute_error'], label='val_mean_absolute_error')
    plt.ylim([0, 10])
    plt.title(f"Artist: {artist}")
    plt.xlabel('Epoch')
    plt.ylabel('Error [MAE]')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_loss(history, 'All')
plot_loss(history_1, '0')
plot_loss(history_2, '1')
plot_loss(history_3, '2')

In [None]:
print('Best All_Artists Model: ', best_model.evaluate(X_test, y_test, verbose=0))
print('Best Artist_0 Model: ', best_model_1.evaluate(X_test_1, y_test_1, verbose=0))
print('Best Artist_1 Model: ', best_model_2.evaluate(X_test_2, y_test_2, verbose=0))
print('Best Artist_2 Model: ', best_model_3.evaluate(X_test_3, y_test_3, verbose=0))

In [None]:
predictions = best_model.predict(X_test).flatten()
predictions_1 = best_model_1.predict(X_test_1).flatten()
predictions_2 = best_model_2.predict(X_test_2).flatten()
predictions_3 = best_model_3.predict(X_test_3).flatten()

In [None]:
def plot_residuals(y, predictions, error_margin, artist=None):
    residuals = y - predictions.flatten()
    
    plt.figure(figsize=(10, 6))

    # Drawing two horizontal lines across the plot for the global error margin
    # These lines are drawn at +/- error_margin directly
    plt.axhline(y=error_margin, color='lightgrey', linestyle='--', alpha=0.8, label=f'+/- {error_margin} error margin')
    plt.axhline(y=-error_margin, color='lightgrey', linestyle='--', alpha=0.8)
    
    plt.scatter(predictions, residuals, alpha=0.5, label='Residuals')
    plt.axhline(y=0, color='r', linestyle='--', label='Zero Residual')
    
    # Setting the y-axis limits to ensure the error margin lines are within view
    # Expand the limits based on the actual residuals and the error margin
    plt.ylim(min(residuals.min(), -error_margin * 1.1), max(residuals.max(), error_margin * 1.1))
    
    plt.title(f'Residuals vs. Predicted Values - Artist: {artist}')
    plt.xlabel('Predicted Values')
    plt.ylabel('Residuals')
    plt.legend()
    plt.show()

plot_residuals(y_test, predictions, ERROR_MARGIN_G, artist='All')
plot_residuals(y_test_1, predictions_1, ERROR_MARGIN_G, artist='0')
plot_residuals(y_test_2, predictions_2, ERROR_MARGIN_G, artist='1')
plot_residuals(y_test_3, predictions_3, ERROR_MARGIN_G, artist='2')

In [None]:
def plot_error_margin(y, predictions, error_margin, artist=None):
    # Direct comparison with the error margin in grams
    residuals = np.abs(y - predictions)
    accurate_predictions = residuals <= error_margin
    underestimations = (y - predictions) > error_margin
    overestimations = (predictions - y) > error_margin
    
    plt.figure(figsize=(10, 6))
    plt.scatter(predictions[accurate_predictions], y[accurate_predictions], color='green', alpha=0.5, label=f'Accurate within {error_margin} grams')
    plt.scatter(predictions[underestimations], y[underestimations], color='blue', alpha=0.5, label='Underestimations')
    plt.scatter(predictions[overestimations], y[overestimations], color='red', alpha=0.5, label='Overestimations')
    
    plt.xlabel('Predictions')
    plt.ylabel('True Values')
    plt.title(f'Prediction Accuracy with {error_margin} Grams Error Margin - Artist: {artist}')
    plt.legend()
    plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=2)  # Diagonal line for reference
    plt.show()

plot_error_margin(y_test, predictions, ERROR_MARGIN_G, artist='All')
plot_error_margin(y_test_1, predictions_1, ERROR_MARGIN_G, artist='0')
plot_error_margin(y_test_2, predictions_2, ERROR_MARGIN_G, artist='1')
plot_error_margin(y_test_3, predictions_3, ERROR_MARGIN_G, artist='2')

In [None]:
def plot_error_distribution(y_true, y_pred, artist=None):
    differences = y_pred - y_true

    boundaries = [(-np.inf, -15), (-15, -10), (-10, -5), (-5, -4), (-4, -3), (-3, -2), (-2, -1), (-1, 1),
                  (1, 2), (2, 3), (3, 4), (4, 5), (5, 10), (10, 15), (15, np.inf)]
    labels = ['<-15', '-15 to -10', '-10 to -5', '-5 to -4', '-4 to -3', '-3 to -2', '-2 to -1', 
              'Within +/- 1', 
              '1 to 2', '2 to 3', '3 to 4', '4 to 5', '5 to 10', '10 to 15', '>15']
    
    counts = np.zeros(len(labels))
    
    for i, (lower, upper) in enumerate(boundaries):
        if i == 7:  # Special case for 'Within +/- 1'
            counts[i] = np.sum((differences >= lower) & (differences < upper))
        else:
            counts[i] = np.sum((differences > lower) & (differences <= upper))
    
    non_zero_counts = counts > 0
    filtered_labels = np.array(labels)[non_zero_counts]
    filtered_counts = counts[non_zero_counts]

    plt.figure(figsize=(14, 8))
    plt.bar(filtered_labels, filtered_counts, color='skyblue')
    plt.xlabel('Error Range (grams)')
    plt.ylabel('Count')
    plt.title(f'Prediction Error Distribution - Artist: {artist}')
    plt.xticks(rotation=45)
    plt.grid(axis='y', linestyle='--')
    plt.show()

plot_error_distribution(y_test, predictions, artist='All')
plot_error_distribution(y_test_1, predictions_1, artist='0')
plot_error_distribution(y_test_2, predictions_2, artist='1')
plot_error_distribution(y_test_3, predictions_3, artist='2')

In [None]:
df_comparison = pd.DataFrame({'y_test':y_test, 'y_pred': predictions})
df_1_comparison = pd.DataFrame({'y_test':y_test_1, 'y_pred': predictions_1})
df_2_comparison = pd.DataFrame({'y_test':y_test_2, 'y_pred': predictions_2})
df_3_comparison = pd.DataFrame({'y_test':y_test_3, 'y_pred': predictions_3})

In [None]:
def get_results(model_name, predictions, y, error_margin=2, num_predictors=None):
    if num_predictors is None:
        raise ValueError("num_predictors must be provided to calculate adjusted R-squared.")
    
    n = len(y)
    p = num_predictors
    
    absolute_diff = np.abs(predictions - y)
    correct_predictions = np.sum(absolute_diff <= error_margin) / n * 100
    
    rmse = mean_squared_error(y, predictions)
    mae = mean_absolute_error(y, predictions)
    r_squared = r2_score(y, predictions)
    adjusted_r_squared = 1 - (1-r_squared) * (n-1) / (n-p-1)

    print(f'++++++++ Results for model {model_name} ++++++++\n')
    print(f"Percentage of predictions within {error_margin} grams of the actual values: {correct_predictions}%")
    print(f"RMSE: {rmse}")
    print(f"MAE: {mae}")
    print(f"R-squared: {r_squared}")
    print(f"Adjusted R-squared: {adjusted_r_squared}")

    return correct_predictions

class KerasRegressorWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, model):
        self.model = model

    def fit(self, X, y):
        self.model.fit(X, y, verbose=0)

    def predict(self, X):
        return self.model.predict(X)

    def score(self, X, y):
        results = self.model.evaluate(X, y, verbose=0)
        # Assuming the first element is the loss
        loss = results[0]
        return -loss

def plot_permutation_importance(model, X_test, y_test, seed, artist=None):
    wrapped_model = KerasRegressorWrapper(model)
    result = permutation_importance(wrapped_model, X_test, y_test, n_repeats=10, random_state=seed, n_jobs=1)
    
    sorted_idx = result.importances_mean.argsort()
    plt.barh(X_test.columns[sorted_idx], result.importances_mean[sorted_idx])
    plt.title(f'Prediction Error Distribution - Artist: {artist}')
    plt.xlabel("Permutation Importance")
    plt.show()

def calculate_differences(y_test, y_pred):
    differences = y_pred - y_test
    
    avg_difference = np.mean(np.abs(differences))
    min_difference = np.min(differences)
    max_difference = np.max(differences)
    
    print(f'Average difference: {avg_difference}, Minimum difference: {min_difference}, Maximum difference: {max_difference}')

def save_model(model, file_path, accuracy):
    if accuracy > 80 and not os.path.exists(file_path):
        model.save(file_path)
        print(f'Model saved as {file_path}')

In [None]:
accuracy = get_results('All_Artist', predictions, y_test, error_margin=ERROR_MARGIN_G, num_predictors=best_model.input_shape[1])
calculate_differences(y_test, predictions)
save_model(best_model, f'model_all-{round(accuracy, 3)}.keras', accuracy)
plot_permutation_importance(best_model, X_test, y_test, SEED, artist='All')

accuracy_1 = get_results('Artist_1', predictions_1, y_test_1, error_margin=ERROR_MARGIN_G, num_predictors=best_model_1.input_shape[1])
calculate_differences(y_test_1, predictions_1)
save_model(best_model_1, f'model_1-{round(accuracy_1, 3)}.keras', accuracy_1)
plot_permutation_importance(best_model_1, X_test_1, y_test_1, SEED, artist='0')

accuracy_2 = get_results('Artist_2', predictions_2, y_test_2, error_margin=ERROR_MARGIN_G, num_predictors=best_model_2.input_shape[1])
calculate_differences(y_test_2, predictions_2)
save_model(best_model_2, f'model_2-{round(accuracy_2, 3)}.keras', accuracy_2)
plot_permutation_importance(best_model_2, X_test_2, y_test_2, SEED, artist='1')

accuracy_3 = get_results('Artist_3', predictions_3, y_test_3, error_margin=ERROR_MARGIN_G, num_predictors=best_model_3.input_shape[1])
calculate_differences(y_test_3, predictions_3)
save_model(best_model_3, f'model_3-{round(accuracy_3, 3)}.keras', accuracy_3)
plot_permutation_importance(best_model_3, X_test_3, y_test_3, SEED, artist='2')