In [None]:
import pandas as pd 
import matplotlib.pyplot as plt
import os 
import seaborn as sns
from sklearn.model_selection import train_test_split
import pickle
import tensorflow as tf
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler,  RobustScaler
import pickle
from tensorflow.keras.layers import Input, Dense, Concatenate, Dropout
from keras import regularizers, layers, optimizers, initializers
from tensorflow.keras.callbacks import TensorBoard
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error

from tensorflow.keras.applications import EfficientNetV2M
import numpy as np
import gc

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, Input, Concatenate, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
print(f'Current GPU allocator: {os.getenv("TF_GPU_ALLOCATOR")}')

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            print(f'Setting memory growth for {gpu}')
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

    

In [None]:
study_name = '419_stdminmax_lrred_images_3'

In [None]:

mean_columns = ['X4_mean', 'X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']

In [None]:
train_df = pd.read_csv('./data/train.csv')
test_df = pd.read_csv('./data/test.csv')

In [None]:
sd_columns = [col for col in train_df.columns if col.endswith('_sd')]
train_df.drop(columns=sd_columns, inplace=True)

In [None]:
train_images_path = './data/train_images/'
test_images_path = './data/test_images/'    

train_df['image_path'] = train_df['id'].apply(lambda x: os.path.join(train_images_path, f'{x}.jpeg'))
test_df['image_path'] = test_df['id'].apply(lambda x: os.path.join(test_images_path, f'{x}.jpeg'))

In [None]:
for column in mean_columns:
    lower_quantile = train_df[column].quantile(0.005)
    upper_quantile = train_df[column].quantile(0.985)  
    train_df = train_df[(train_df[column] >= lower_quantile) & (train_df[column] <= upper_quantile)]

In [None]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for i, trait in enumerate(mean_columns):

    # Determine the bin edges dynamically based on the distribution of traits
    bin_edges = np.percentile(train_df[trait], np.linspace(0, 100, 5 + 1))
    train_df[f"bin_{i}"] = np.digitize(train_df[trait], bin_edges)

# Concatenate the bins into a final bin
train_df["final_bin"] = (
    train_df[[f"bin_{i}" for i in range(len(mean_columns))]]
    .astype(str)
    .agg("".join, axis=1)
)

# Perform the stratified split using final bin
train_df = train_df.reset_index(drop=True)
for fold, (train_idx, valid_idx) in enumerate(skf.split(train_df, train_df["final_bin"])):
    train_df.loc[valid_idx, "fold"] = fold

In [None]:
train_original = train_df.copy()
train_plot = train_df.copy()
sample_df = train_df.copy()
train_df = sample_df[sample_df.fold != 3]
valid_df = sample_df[sample_df.fold == 3]
print(f"# Num Train: {len(train_df)} | Num Valid: {len(valid_df)}")

In [None]:
import glob



directory_path = './NN_search'
pattern = f"{directory_path}/{study_name}*.h5"

files = glob.glob(pattern)

max_r2_score = float('-inf')
best_model = None

# Käy läpi jokainen tiedosto ja etsi suurin r2_score_inv
for file in files:
    value = float(file.split('best_val')[1].split('_')[1])
    if value > max_r2_score:
        max_r2_score = value
        best_model = file


# Tulosta suurin löydetty r2_score_inv ja vastaava tiedosto
print(f"Best R2-score: {max_r2_score:.5f}")
if best_model:
    print(f"Best model: {best_model}")
else:
    print("No best model found")

best_log_transforms_name =  f'./NN_search/{study_name}_{max_r2_score:.5f}_best_log_transforms.pickle'
best_scalers_name = f'./NN_search/{study_name}_{max_r2_score:.5f}_best_scalers.pickle'

print(f'Opening log transforms from {best_log_transforms_name}')
with open(best_log_transforms_name, 'rb') as f:
    log_transforms = pickle.load(f)

print(f'Opening scalers from {best_scalers_name}')
with open(best_scalers_name, 'rb') as f:
    scaler_transforms = pickle.load(f)


def r2_score_tf(y_true, y_pred):

    try: 
        ss_res = tf.reduce_sum(tf.square(y_true - y_pred), axis=0)
        ss_tot = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true, axis=0)), axis=0)
        r2 = 1 - ss_res/(ss_tot + tf.keras.backend.epsilon())
        r2 = tf.where(tf.math.is_nan(r2), tf.zeros_like(r2), r2) 
        return tf.reduce_mean(tf.maximum(r2, 0.0))
    except Exception as e:
        # print(f'Error in r2_score_tf: {e}')
        return float('-inf')
    
custom_objects = {"r2_score_tf": r2_score_tf}

nas_model  = tf.keras.models.load_model(best_model, custom_objects=custom_objects)

nas_model.summary()
        

In [None]:

trainable_count_nas = sum([tf.size(v).numpy() for v in nas_model.trainable_weights])
non_trainable_count_nas = sum([tf.size(v).numpy() for v in nas_model.non_trainable_weights])
print(f"Total parameters nas: {trainable_count_nas + non_trainable_count_nas:,}")
print(f"Trainable parameters nas: {trainable_count_nas:,}")
print(f"Non-trainable parameters nas: {non_trainable_count_nas:,}")

In [None]:
print(f'Scaler are: {scaler_transforms}')
print(f'Log transforms are: {log_transforms}')

In [None]:
def augment_image(img):
    img = tf.image.random_flip_left_right(img)
    img = tf.image.rot90(img, k=tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))
    img = tf.image.random_brightness(img, max_delta=0.2)
    img = tf.image.random_hue(img, max_delta=0.1)
    img = tf.image.random_saturation(img, lower=0.8, upper=1.2)
    img = tf.image.random_contrast(img, lower=0.8, upper=1.2)

    # crop_size = tf.random.uniform(shape=[], minval=25, maxval=150, dtype=tf.int32)
    # img = tf.image.random_crop(img, size=[crop_size, crop_size, 3])
    # img = tf.image.resize(img, [480, 480])

    return img


def process_image(file_path):
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (480, 480))
    img = augment_image(img)  
    # img = tf.cast(img, tf.uint8)    
    return img

def process_image_valid(file_path):
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (480, 480))
    # img = tf.cast(img, tf.uint8)
    return img



# Define your dataset processing function
def process_path_train(file_path, targets):
    img = process_image(file_path)
    return (img, img), targets


def process_path_valid(file_path, targets):
    img = process_image_valid(file_path)
    return (img, img), targets


In [None]:
y_train = train_df[mean_columns]
y_valid = valid_df[mean_columns]


y_train_transformed = y_train.copy()
y_valid_transformed = y_valid.copy()


for target, log_base in log_transforms.items():
    if log_base is not None and log_base != 'sqrt' and log_base != 'cbrt':
        y_train_transformed[target] = np.log(y_train[target]) / np.log(log_base)
        y_valid_transformed[target] = np.log(y_valid[target]) / np.log(log_base)

    elif log_base == 'sqrt':
        y_train_transformed[target] = np.sqrt(y_train[target])
        y_valid_transformed[target] = np.sqrt(y_valid[target])

    elif log_base == 'cbrt':
        y_train_transformed[target] = np.cbrt(y_train[target])
        y_valid_transformed[target] = np.cbrt(y_valid[target])

    else:
        y_train_transformed[target] = y_train[target]
        y_valid_transformed[target] = y_valid[target]    

for target, scaler in scaler_transforms.items():
    if scaler is not None:
        y_train_transformed[target] = scaler.transform(y_train_transformed[target].values.reshape(-1, 1)).flatten()
        y_valid_transformed[target] = scaler.transform(y_valid_transformed[target].values.reshape(-1, 1)).flatten()


In [None]:
BUFFER_SIZE = 50000 
EPOCHS = 8
BATCH_SIZE = 64

train_images_path = train_df['image_path'].values
valid_images_path = valid_df['image_path'].values

train_dataset = tf.data.Dataset.from_tensor_slices((train_images_path, y_train_transformed.values))
train_dataset = train_dataset.shuffle(BUFFER_SIZE)
valid_dataset = tf.data.Dataset.from_tensor_slices((valid_images_path, y_valid_transformed.values))

train_dataset = train_dataset.map(process_path_train, num_parallel_calls=tf.data.experimental.AUTOTUNE)
valid_dataset = valid_dataset.map(process_path_valid, num_parallel_calls=tf.data.experimental.AUTOTUNE)

train_dataset = train_dataset.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
valid_dataset = valid_dataset.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

len_train = len(train_dataset) * EPOCHS

print(f'LR schedule steps: {len_train}')




In [None]:
#####  KAKSIHAARAINEN AVG MAX IMAGELLA #####



tf.keras.backend.clear_session()
gc.collect()



import tensorflow as tf
import numpy as np

def r2_score_tf(y_true, y_pred):

    try: 
        ss_res = tf.reduce_sum(tf.square(y_true - y_pred), axis=0)
        ss_tot = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true, axis=0)), axis=0)
        r2 = 1 - ss_res/(ss_tot + tf.keras.backend.epsilon())
        r2 = tf.where(tf.math.is_nan(r2), tf.zeros_like(r2), r2) 
        return tf.reduce_mean(tf.maximum(r2, 0.0))
    except Exception as e:
        # print(f'Error in r2_score_tf: {e}')
        return float('-inf')


# Asetetaan syötteet
image_input_avg = Input(shape=(480, 480, 3), name='image_input_avg')
image_input_max = Input(shape=(480, 480, 3), name='image_input_max')

# Luo perus EfficientNetV2M mallit
eff_avg_base = EfficientNetV2M(weights='imagenet', include_top=False, pooling='avg', input_tensor=image_input_avg)
eff_max_base = EfficientNetV2M(weights='imagenet', include_top=False, pooling='max', input_tensor=image_input_max)

# Kloonaa ja nimeä uudelleen mallit
def clone_and_rename(model, prefix):
    # Kloonaa malli ja nimeä kaikki kerrokset uudelleen
    for layer in model.layers:
        layer._name = prefix + '_' + layer.name
    return model

eff_avg_cloned = clone_and_rename(eff_avg_base, 'eff_avg')
eff_max_cloned = clone_and_rename(eff_max_base, 'eff_max')

# Luo Model-instanssit uudelleen kloonatuille malleille
model_avg = Model(inputs=image_input_avg, outputs=eff_avg_cloned.output, name='model_avg')
model_max = Model(inputs=image_input_max, outputs=eff_max_cloned.output, name='model_max')

model_avg.trainable = True
for layer in model_avg.layers[:-34]:
    layer.trainable = False
model_max.trainable = True
for layer in model_max.layers[:-34]:
    layer.trainable = False

# Käytä NAS-mallia, jos se on määritelty
nas_output = nas_model([model_avg.output, model_max.output])

# Rakenna lopullinen malli
finetune_model = Model(inputs=[image_input_avg, image_input_max], outputs=nas_output, name='finetune_model')
# finetune_model.summary()

lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
    5e-4,    
    alpha=0.05,
    name="CosineDecay",
    decay_steps=len_train
)

# Aseta oppimisnopeuden aikataulu
finetune_model.compile(optimizer=optimizers.RMSprop(learning_rate=lr_schedule), loss='mae', metrics=['mse', 'mae', 'mape', r2_score_tf])






In [None]:
trainable_count = sum([tf.size(v).numpy() for v in finetune_model.trainable_weights])
non_trainable_count = sum([tf.size(v).numpy() for v in finetune_model.non_trainable_weights])
print(f"Total parameters: {trainable_count + non_trainable_count:,}")
print(f"Trainable parameters: {trainable_count:,}")
print(f"Non-trainable parameters: {non_trainable_count:,}")

print(f'Total parameters from EfficientNetV2M: { (trainable_count + non_trainable_count) - (trainable_count_nas + non_trainable_count_nas):,}')
print(f'Trainable from EfficientNetV2M: {trainable_count - trainable_count_nas:,}')
print(f'Non trainable from EfficientNetV2M: {non_trainable_count - non_trainable_count_nas:,}')

print(f'Trainable fron NAS: {trainable_count_nas:,}')
print(f'Non trainable from NAS: {non_trainable_count_nas:,}')
print(F'Total parameters from NAS: {trainable_count_nas + non_trainable_count_nas:,}')

In [None]:
class TrainImageLoggingCallback(tf.keras.callbacks.Callback):
    def __init__(self, log_dir, dataset, num_images=10):
        super().__init__()
        self.log_dir = log_dir
        self.writer = tf.summary.create_file_writer(log_dir)
        self.num_images = num_images        
        self.dataset = dataset.unbatch().take(num_images)  # Ota vain pieni määrä kuvia loggausta varten ja pura batchit

    def on_epoch_end(self, epoch, logs=None):
        with self.writer.as_default():
            for (img_batch, _), _ in self.dataset:
                # Olettaen, että tuplakuvat ovat identtisiä, voit loggaa vain yhden niistä
                img_batch = tf.expand_dims(img_batch, axis=0)
                img = tf.clip_by_value(img_batch, 0, 255)
                img = tf.cast(img_batch, tf.uint8)                
                tf.summary.image("Training Data Augmentation", img, step=epoch, max_outputs=20)
        self.writer.flush()
    


In [None]:
print(f'Creating logs for trial: {study_name} in ./logs/trial_{study_name}')
log_folder = f"./logs/trial_{study_name}"
os.makedirs(log_folder, exist_ok=True)

# Aseta logitiedostojen hakemisto
tensorboard_callback = TensorBoard(log_dir=log_folder, histogram_freq=1, update_freq='epoch')

In [20]:


callbacks = [
    tf.keras.callbacks.ModelCheckpoint(filepath=f'./NN_search/testifinetus_{study_name}.h5', monitor='val_mae', save_best_only=True, save_weights_only=True, mode = 'min',  verbose = 1),
    tensorboard_callback,
    TrainImageLoggingCallback(log_folder, train_dataset)    
]

history = finetune_model.fit(train_dataset, validation_data=valid_dataset, epochs=EPOCHS, verbose=1, callbacks=callbacks)




KeyboardInterrupt: 

In [None]:
finetune_model.load_weights(f'./NN_search/testifinetus_{study_name}.h5')
# finetune_model.save(f'./NN_search/koe', save_format='tf') # TODO tässä ongelmaa vielä, mutta ei väliä. Malli on jo olemassa ja sitä voi käyttää. 


In [None]:
for target, log_base in log_transforms.items():
    if log_base is not None and log_base != 'sqrt' and log_base != 'cbrt':
        train_plot[target] = np.log(train_plot[target]) / np.log(log_base)
        
    elif log_base == 'sqrt':
        train_plot[target] = np.sqrt(train_plot[target])
        
    elif log_base == 'cbrt':
        train_plot[target] = np.cbrt(train_plot[target])
        
    else:
        train_plot[target] = train_plot[target]
        
for target, scaler in scaler_transforms.items():
    if scaler is not None:
        train_plot[target] = scaler.transform(train_plot[target].values.reshape(-1, 1)).flatten()
        train_plot[target] = scaler.transform(train_plot[target].values.reshape(-1, 1)).flatten()

In [None]:
print(log_transforms)
print(scaler_transforms)

In [None]:
train_original[mean_columns].describe()

In [None]:
train_plot[mean_columns].describe()

In [None]:
def plot_data(df, columns_names):
    plt.figure(figsize=(15, 3))

    # Setting up a grid of plots with 2 columns
    n_cols = 6
    n_rows = len(columns_names) // n_cols + (len(columns_names) % n_cols > 0)

    for i, col in enumerate(columns_names):
        plt.subplot(n_rows, n_cols, i+1)
        sns.kdeplot(df[col], bw_adjust=0.5, fill=False, color='blue')
        plt.title(f'Distribution of {col}')
        plt.xlabel('Value')
        plt.ylabel('Density')

    plt.tight_layout()
    plt.show()
    


In [None]:
plot_data(train_original, mean_columns)

In [None]:
plot_data(train_plot, mean_columns)

In [None]:
results_training_name = './data/results_finetune_images.pickle'

if os.path.exists(results_training_name):
    results_training = pd.read_pickle(results_training_name)
else:
    columns = ['Train R2', 'Train MSE', 'Train MAE', 'Train MAPE', 'Valid R2', 'Valid MSE', 'Valid MAE', 'Valid MAPE', 'Train preds Desc', 'Valid preds Desc', 'Test preds Desc' , 'Original data Desc' 'Kaggle R2', 'Scalers', 'Log/Pot transforms']
    results_training = pd.DataFrame(columns = columns)
    results_training.index.name = 'Study name'

study_name_result = f'{study_name}_finetuned'

if study_name_result not in results_training.index:    
    results_training.loc[study_name] = [None]*len(results_training.columns)

In [None]:


train_images_path = train_df['image_path'].values
valid_images_path = valid_df['image_path'].values

train_dataset = tf.data.Dataset.from_tensor_slices((train_images_path, y_train_transformed.values))
valid_dataset = tf.data.Dataset.from_tensor_slices((valid_images_path, y_valid_transformed.values))

train_dataset = train_dataset.map(process_path_valid, num_parallel_calls=tf.data.experimental.AUTOTUNE)
valid_dataset = valid_dataset.map(process_path_valid, num_parallel_calls=tf.data.experimental.AUTOTUNE)

BATCH_SIZE = 64

train_dataset = train_dataset.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
valid_dataset = valid_dataset.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)





In [None]:

## TRAINING DATA TEST
tf.keras.backend.clear_session()
gc.collect()

train_pred = finetune_model.predict(train_dataset, verbose=1)

for i, target in enumerate(mean_columns):
    print(f'Scaler transforming target : {target} with scaler : {scaler_transforms[target]}')
    scaler = scaler_transforms[target]
    if scaler is not None:
        train_pred[:, i] = scaler.inverse_transform(train_pred[:, i].reshape(-1, 1)).flatten()


for i, target in enumerate(mean_columns):
    print(f'Logpot transforming target : {target}, log transform : {log_transforms[target]}')
    log_base = log_transforms[target]
    if log_base is not None and log_base != 'sqrt' and log_base != 'cbrt':
        train_pred[:, i] = np.power(log_base, train_pred[:, i])
    elif log_base == 'sqrt':
        train_pred[:, i] = np.square(train_pred[:, i])
    elif log_base == 'cbrt':
        train_pred[:, i] = np.power(train_pred[:, i], 3)

R2_train = r2_score(y_train, train_pred)
MSE_train = mean_squared_error(y_train, train_pred)
MAE_train = mean_absolute_error(y_train, train_pred)
MAPE_train = mean_absolute_percentage_error(y_train, train_pred)

print(f'Train scores:\nR2 : {R2_train:.5f}, MSE : {MSE_train:.5f}, MAE : {MAE_train:.5f}, MAPE : {MAPE_train:.5f}')
results_training.at[study_name_result, 'Train R2'] = R2_train
results_training.at[study_name_result, 'Train MSE'] = MSE_train
results_training.at[study_name_result, 'Train MAE'] = MAE_train
results_training.at[study_name_result, 'Train MAPE'] = MAPE_train

trainining_preds_desc = pd.DataFrame(train_pred, columns = mean_columns).describe().to_json()
results_training.at[study_name_result, 'Train preds Desc'] = trainining_preds_desc

In [None]:

## VALIDATION DATA TEST

tf.keras.backend.clear_session()
gc.collect()

valid_pred = finetune_model.predict(valid_dataset, verbose=1)

for i, target in enumerate(mean_columns):
    print(f'Scaler transforming target : {target} with scaler : {scaler_transforms[target]}')
    scaler = scaler_transforms[target]
    if scaler is not None:
        valid_pred[:, i] = scaler.inverse_transform(valid_pred[:, i].reshape(-1, 1)).flatten()


for i, target in enumerate(mean_columns):
    log_base = log_transforms[target]
    if log_base is not None and log_base != 'sqrt' and log_base != 'cbrt':
        valid_pred[:, i] = np.power(log_base, valid_pred[:, i])
    elif log_base == 'sqrt':
        valid_pred[:, i] = np.square(valid_pred[:, i])
    elif log_base == 'cbrt':
        valid_pred[:, i] = np.power(valid_pred[:, i], 3)

R2_valid = r2_score(y_valid, valid_pred)
MSE_valid = mean_squared_error(y_valid, valid_pred)
MAE_valid = mean_absolute_error(y_valid, valid_pred)
MAPE_valid = mean_absolute_percentage_error(y_valid, valid_pred)

print(f'Valid scores:\nR2 : {R2_valid:.5f}, MSE : {MSE_valid:.5f}, MAE : {MAE_valid:.5f}, MAPE : {MAPE_valid:.5f}')
results_training.at[study_name_result, 'Valid R2'] = R2_valid
results_training.at[study_name_result, 'Valid MSE'] = MSE_valid
results_training.at[study_name_result, 'Valid MAE'] = MAE_valid
results_training.at[study_name_result, 'Valid MAPE'] = MAPE_valid

valid_preds_desc = pd.DataFrame(valid_pred, columns = mean_columns).describe().to_json()
results_training.at[study_name_result, 'Valid preds Desc'] = valid_preds_desc



In [None]:
# TEST DATA 

BATCH_SIZE = 64

test_df_copy = test_df.copy()
submission_df = test_df_copy[['id']].copy()



test_images_path = test_df_copy['image_path'].values
test_dataset = tf.data.Dataset.from_tensor_slices(test_images_path)
test_dataset = test_dataset.map(process_path_train, num_parallel_calls=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

tf.keras.backend.clear_session()
gc.collect()

predictions = finetune_model.predict(test_dataset, verbose=1)

for i, target in enumerate(mean_columns):
    print(f'Scaler transforming target : {target} with scaler : {scaler_transforms[target]}')
    scaler = scaler_transforms[target]
    if scaler is not None:
        predictions[:, i] = scaler.inverse_transform(predictions[:, i].reshape(-1, 1)).flatten()


for i, target in enumerate(mean_columns):
    print(f'Logpot transforming target: : {target}, log transform : {log_transforms[target]}')
    log_base = log_transforms[target]
    if log_base is not None and log_base != 'sqrt' and log_base != 'cbrt':
        predictions[:, i] = np.power(log_base, predictions[:, i])
    elif log_base == 'sqrt':
        predictions[:, i] = np.square(predictions[:, i])
    elif log_base == 'cbrt':
        predictions[:, i] = np.power(predictions[:, i], 3)


In [None]:

target_columns = ['X4', 'X11', 'X18', 'X50', 'X26', 'X3112']

submission_df[target_columns] = predictions


In [None]:

test_preds_desc = submission_df[target_columns].describe().to_json()
results_training.at[study_name_result, 'Test preds Desc'] = test_preds_desc 

original_data_desc = train_original[mean_columns].describe().to_json()
results_training.at[study_name_result, 'Original data Desc'] = original_data_desc



In [None]:
print(f'{str(log_transforms.items())}')
print(f'{str(scaler_transforms.items())}')

results_training.at[study_name_result, 'Scalers'] = f'{scaler_transforms}'
results_training.at[study_name_result, 'Log/Pot transforms'] = f'{str(log_transforms.items())}'



In [None]:
results_training.at[study_name_result, 'Kaggle R2'] = None

for index, row in results_training.iterrows():
    print(f"Study Name: {index}")
    print(f'Kaggle R2: {row["Kaggle R2"]}')
    print(f"Train R2: {row['Train R2']}, Train MSE: {row['Train MSE']}, Train MAE : {row['Train MAE']}, Train MAPE: {row['Train MAPE']}")
    print(f'Valid R2: {row["Valid R2"]}, Valid MSE: {row["Valid MSE"]}, Valid MAE: {row["Valid MAE"]}, Valid MAPE: {row["Valid MAPE"]}')
    print("-" * 50)
    print("Train preds Description:")
    display(pd.read_json(row['Train preds Desc']))
    print("Valid preds Description:")
    display(pd.read_json(row['Valid preds Desc']))
    print("Test preds Description:")
    display(pd.read_json(row['Test preds Desc']))
    print("Original data Description:")
    display(pd.read_json(row['Original data Desc']))
    

In [None]:
submission_df.head()

In [None]:
print(submission_df.info())

submission_df.to_csv('./data/submission.csv', index=False)

In [None]:
with open(results_training_name, 'wb') as f:
    results_training.to_pickle(f)