In [None]:
import pickle 
import os
import sys
from pathlib import Path

import mlflow
import pandas as pd

air_quality_temperature_predictor_path = '../'
sys.path.append(air_quality_temperature_predictor_path)

from Air_Quality_temperature_predictor.utils import split_stratified
from Air_Quality_temperature_predictor.data_preprocessing import DataTransformer
from Air_Quality_temperature_predictor.model_test import test_model_cv

In [73]:
models_path = Path('../Air_Quality_temperature_predictor/models')

model_files = os.listdir(models_path)
model_names = [model.split('.')[0] for model in model_files]
models = {}

for model_name, model_file in zip(model_names, model_files):
    model_path = models_path / model_file
    with open(model_path, 'rb') as f:
        models[model_name] = pickle.load(f)
print(f"Loaded models: {models}")
    

Loaded models: {'DecisionTree_model_v1': Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('numerical',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(missing_values=-200)),
                                                                  ('scaler',
                                                                   StandardScaler())]),
                                                  ['CO(GT)', 'PT08.S1(CO)',
                                                   'NMHC(GT)', 'C6H6(GT)',
                                                   'PT08.S2(NMHC)', 'NOx(GT)',
                                                   'PT08.S3(NOx)', 'NO2(GT)',
                                                   'PT08.S4(NO2)',
                                                   'PT08.S5(O3)', 'RH', 'AH']),
                                                 ('categorica

In [74]:
dataframe = pd.read_excel('../data/AirQualityUCI.xlsx')

In [75]:
train_data, test_data = split_stratified(dataframe)

(9357, 16)


In [76]:
X_train, y_train = train_data.drop(columns=['T'], axis=1), train_data['T']
X_test, y_test = test_data.drop(columns=['T'], axis=1), test_data['T']

In [None]:
mlflow.set_tracking_uri('http://localhost:5000')

mlflow.set_experiment('Air Quality Temperature Predictor (V1)')

for model_name, model in models.items():

    model_params = model['model'].get_params()
    scores = test_model_cv(model, X_test, y_test)
    mean = scores.mean()
    std = scores.std()

    with mlflow.start_run(run_name=model_name):
        mlflow.log_params(model_params)
        mlflow.log_metric('mean_score', mean)
        mlflow.log_metric('std_score', std)
        mlflow.sklearn.log_model(model, model_name)