-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels_evaluation.py
212 lines (161 loc) · 7.77 KB
/
models_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# Import
import os
import pandas as pd
from IPython import display
from sklearn.metrics import classification_report
# My import
import plot_functions
import constants as const
import utils.general_functions as general
# ******************************************** #
# ************* MODEL EVALUATION ************* #
# ******************************************** #
# Print info about the hyperparameter search
def get_hyperparameters_search_info(model_name, best_hyperparameters):
"""
Print information about the optimal hyperparameter found during the tuning process,
and store them into a .csv file
:param model_name: The name of the model.
:param best_hyperparameters: The best hyperparameters.
"""
# Print info.
print("\n> The hyperparameter search is complete!"
"\n- The optimal number of units in the densely-connected layer is: {}"
.format(best_hyperparameters.get("units")) + # Dense layer units
"\n- The optimal dropout rate is: {}"
.format(best_hyperparameters.get("dropout_rate")) + # Dropout-rate
"\n- The optimal learning rate for the optimizer is: {}"
.format(best_hyperparameters.get("learning_rate"))) # Learning rate
# Define a dataframe
model_best_hp_dict = {
"Model": model_name,
"Units": best_hyperparameters["units"],
"Dropout Rate": best_hyperparameters["dropout_rate"],
"Learning Rate": best_hyperparameters["learning_rate"]
}
# Turn it into a dataframe
df = pd.DataFrame(data=[model_best_hp_dict])
# Save data to csv file
file_path = os.path.join(const.DATA_PATH, f"{model_name}_best_hyperparameters.csv")
df.to_csv(file_path, index=False, float_format="%.4f")
# Collect data about the search
def collect_hyperparameters_tuning_data(model_name, tuner):
"""
Collects hyperparameter tuning data for different models and saves it as a CSV file.
:param model_name: (string) The name of the model being tuned.
Supported options: 'MLP', 'CNN', 'MobileNet'.
:param tuner: The hyperparameter tuner object.
:return: None
"""
# if not already present, create a folder to store data
general.makedir(dirpath=const.DATA_PATH)
# models: MLP, CNN, MobileNet
keras_model_trials = []
keras_model_units = []
keras_model_dropout_rate = []
keras_model_learning_rates = []
keras_model_score = []
for num_trial in tuner.oracle.trials.values():
keras_model_trials.append(int(num_trial.trial_id) + 1)
keras_model_units.append(num_trial.hyperparameters["units"])
keras_model_dropout_rate.append(num_trial.hyperparameters["dropout_rate"])
keras_model_learning_rates.append(num_trial.hyperparameters["learning_rate"])
keras_model_score.append(num_trial.score)
# Define a dataframe
df = pd.DataFrame(list(zip(keras_model_trials, keras_model_units, keras_model_dropout_rate,
keras_model_learning_rates, keras_model_score)),
columns=["Trial", "Units", "Dropout Rate", "Learning Rate", "Validation Accuracy"])
# Sort the dataframe by trial values
df.sort_values(by=["Trial"], ascending=True, inplace=True)
# Save data to csv file
file_path = os.path.join(const.DATA_PATH, f"{model_name}_hyperparameter_tuning_data.csv")
df.to_csv(file_path, index=False, float_format="%.4f")
# Test Loss and Accuracy metrics
def test_accuracy_loss_model(model, model_name, x_test, y_test):
"""
Compute a simple evaluation of the model,
printing the loss and accuracy metrics for the test set.
:param model: Model in input.
:param model_name: Name of the model.
:param x_test: Input values of the test dataset.
:param y_test: Target values of the test dataset.
:return data_list: evaluation metrics dictionary
"""
# Collect evaluation metrics
test_score = model.evaluate(x=x_test, y=y_test, verbose=0)
# Loss and Accuracy for the test set
test_loss = test_score[0]
test_accuracy = test_score[1]
# Print evaluation info. about the model
print("- Test Loss: {:.4f}".format(test_loss))
print("- Test Accuracy: {:.4f} %".format(test_accuracy * 100))
# Collect data
data_list = {
"Model": model_name,
"Loss": test_loss,
"Accuracy (%)": test_accuracy * 100
}
# Return dictionary
return data_list
def compute_evaluation_metrics(model, model_name, x_test, y_test):
"""
Get the classification report about the model in input.
:param model: Model in input.
:param model_name: Name of the model.
:param x_test: Input values of the test dataset.
:param y_test: Target values of the test dataset.
:return: The Classification Report Dataframe of the model
"""
# Predict
predict = model.predict(x=x_test, verbose=0)
# Convert the predictions to binary classes (0 or 1)
y_pred = (predict > 0.5).astype("int32")
# Compute report
clf_report = classification_report(y_true=y_test, y_pred=y_pred, target_names=const.CLASS_LIST, digits=2,
output_dict=True)
# Update so in df is shown in the same way as standard print
clf_report.update({"accuracy": {"precision": None, "recall": None, "f1-score": clf_report["accuracy"],
"support": clf_report.get("macro avg")["support"]}})
df = pd.DataFrame(data=clf_report).transpose()
# Print report
print("\n> Classification Report:")
display.display(df)
# Save the report
general.makedir(dirpath=const.DATA_PATH)
file_path = os.path.join(const.DATA_PATH, f"{model_name}_classification_report.csv")
df.to_csv(file_path, index=True, index_label="index", float_format="%.3f")
return df
# Model evaluation with extrapolation of data and information plot
def evaluate_model(model, model_name, x_test, y_test, show_plot=True, save_plot=True):
"""
Evaluate the Performance of the model on the test set.
:param model: The model in input.
:param model_name: Name of the model.
:param x_test: Input values of the test dataset.
:param y_test: Target values of the test dataset.
:param show_plot: If True, displays the plot on the screen.
Default is True.
:param save_plot: If True, save the plot.
Default is True.
:return data: evaluation metrics dictionary per model
"""
# Evaluate the model
print("\n> " + model_name + " Model Evaluation:")
# Compute a simple evaluation report on the model performances
data = test_accuracy_loss_model(model=model, model_name=model_name, x_test=x_test, y_test=y_test)
# Compute the classification_report of the model
compute_evaluation_metrics(model=model, model_name=model_name, x_test=x_test, y_test=y_test)
# Plot Confusion Matrix
plot_functions.plot_confusion_matrix(model=model, model_name=model_name, x_test=x_test, y_test=y_test,
show_on_screen=show_plot, store_in_folder=save_plot)
# Plot a representation of the prediction
plot_functions.plot_model_predictions_evaluation(model=model, model_name=model_name, class_list=const.CLASS_LIST,
x_test=x_test, y_test=y_test, show_on_screen=show_plot,
store_in_folder=save_plot)
# Plot a visual representation of the classification model, predicting classes
plot_functions.plot_visual_prediction(model=model, model_name=model_name, x_test=x_test, y_test=y_test,
show_on_screen=show_plot, store_in_folder=save_plot)
# Prints a separation line for cleaner output
print("__________________________________________________________________________________________")
# Return dictionary
return data