-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.py
77 lines (61 loc) · 2.9 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from tpot import TPOTRegressor
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import make_scorer
from config import parsed_train_path, parsed_test_path, checkpoint_folder, our_log_path, \
custom_regressor_config_dict, generations, population_size, max_eval_time_mins, max_time_mins, n_jobs
import numpy as np
import logging
from pprint import pformat
def my_custom_accuracy(y_true, y_pred):
# y_pred[y_pred<0] = 0.0
y_pred = np.expm1(y_pred)
y_true = np.expm1(y_true)
return -np.sqrt(mean_squared_log_error(y_true, y_pred))
def create_and_configer_logger(log_name, level=logging.DEBUG):
# set up logging to file
logging.basicConfig(
filename=log_name,
level=level,
format='\n' + '[%(asctime)s - %(levelname)s] {%(pathname)s:%(lineno)d} -' + '\n' + ' %(message)s' + '\n',
datefmt='%Y-%m-%d %H:%M:%S'
)
# set up logging to console
console = logging.StreamHandler()
console.setLevel(logging.INFO)
# set a format which is simpler for console use
formatter = logging.Formatter('[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s')
console.setFormatter(formatter)
# add the handler to the root logger
logging.getLogger('').addHandler(console)
logger = logging.getLogger()
return logger
if __name__ == '__main__':
logger = create_and_configer_logger(log_name=our_log_path, level=logging.INFO)
with open(parsed_train_path, 'rb') as f:
parsed_train_data = np.load(f)
parsed_train_label = np.load(f)
parsed_train_log_label = np.load(f)
with open(parsed_test_path, 'rb') as f:
parsed_test_data = np.load(f)
parsed_test_label = np.load(f)
parsed_test_log_label = np.load(f)
logger.info("Finished loading data")
logger.info(f"Run params: {generations=}, {population_size=}, {max_eval_time_mins=}, {max_time_mins=}"
f"{n_jobs=}, {custom_regressor_config_dict=}")
my_custom_scorer = make_scorer(my_custom_accuracy, greater_is_better=True)
tpot = TPOTRegressor(generations=generations,
population_size=population_size,
max_eval_time_mins=max_eval_time_mins,
max_time_mins=max_time_mins,
verbosity=3,
n_jobs=n_jobs,
scoring=my_custom_scorer,
random_state=1,
periodic_checkpoint_folder=checkpoint_folder,
config_dict=custom_regressor_config_dict)
tpot.fit(parsed_train_data, parsed_train_log_label)
logger.info("Finished fitting the model")
logger.info(f"The best pipeline \n {tpot.fitted_pipeline_}")
logger.info(f"Loss on test data {-tpot.score(parsed_test_data, parsed_test_log_label)}")
logger.info(f"Trials \n {pformat(tpot.evaluated_individuals_)}")
tpot.export('best_model.py')