# Predictions statistics

Analysis of the predictions made for a certain datasplit (usually test).

**Note:** To run this notebook you previously have to run and save the predictions on a test dataset **with ground-truth values**. See the Datasplit section in [3.0 notebook](./3.0-Computing_predictions.ipynb#Predicting-a-datasplit-txt-file) for reference.


In [None]:
import os
import json
import numpy as np

from imgclas.data_utils import load_image
from imgclas import paths, plot_utils
from imgclas import test_utils
import sklearn.metrics as metrics

import warnings
warnings.filterwarnings("ignore")

# User parameters to set
TIMESTAMP = '2024-01-28_213117'       # timestamp of the model              
SPLIT_NAME = 'test'                   # dataset split to predict
FOLD = ''

model_mae=list()
for i in range(1,21):
    if i < 10:
        MODEL_NAME = f'epoch-0{i}.hdf5' # model to use to make the prediction
    elif i == 21:
        MODEL_NAME = 'final_model.h5'
    else:
        MODEL_NAME = f'epoch-{i}.hdf5'             
    print(f'Executing... {MODEL_NAME}')
    
    # Set the timestamp
    paths.timestamp = TIMESTAMP

    # Load back the predictions
    try:
        pred_path = os.path.join(paths.get_predictions_dir()+FOLD, '{}+{}.json'.format(MODEL_NAME, SPLIT_NAME))
        with open(pred_path) as f:
            pred_dict = json.load(f)



        y, yhat = np.array(pred_dict['true_value']), np.array(pred_dict['pred_value'])

        mae = metrics.mean_absolute_error(y, yhat)
        mse = metrics.mean_squared_error(y, yhat)
        rmse = np.sqrt(mse) # or mse**(0.5)  
        r2 = metrics.r2_score(y,yhat)
    except:
        mae = 40
        
    model_mae.append(mae)
    


In [None]:

import os
import json
from datetime import timedelta

import matplotlib.pylab as plt
import numpy as np

from imgclas import paths, plot_utils

# User parameters to set
TIMESTAMP = '2024-01-28_213117'             # timestamp of the model

# Set the timestamp
paths.timestamp = TIMESTAMP

# Load training statistics
stats_path = os.path.join(paths.get_stats_dir(), 'stats.json')
with open(stats_path) as f:
    stats = json.load(f)

stats['val_mean_absolute_error'] = model_mae
stats['val_loss'] = model_mae

# Load training configuration
conf_path = os.path.join(paths.get_conf_dir(), 'conf.json')
with open(conf_path) as f:
    conf = json.load(f)
    
conf['training']['use_validation'] = True
print(conf)

# Plot the trainig plots 
plot_utils.training_plots(conf, stats)

# Print total training time
tr_time = int(stats['training time (s)'])
print('l2_reg:', conf['training']['l2_reg'])
print('Total training time: {}'.format(timedelta(seconds=tr_time)))
print(stats['mean_absolute_error'])
print('Difference of MAE:', stats['val_mean_absolute_error'][-1]-stats['mean_absolute_error'][-1])


