In [1]:
import logging

from modelhub.core.predictions.ai_models.xgboost import XGBoostRegressorModel
from modelhub.core.predictions.ai_models.xgboost_classifier import \
	XGBoostClassifierModel
from modelhub.core.predictions.trainer import Trainer


def _retrain_models() -> None:
	"""
    Retrain models and add a .pickle file to Azure Blob Storage
    """
	print("Queue is working")
	xgboost = XGBoostRegressorModel()
	xgboost_classifier = XGBoostClassifierModel()
	# isolation_forest = IsolationForestModel()
	logger = logging.getLogger("My logs")
	logger.setLevel(logging.DEBUG)
	logger.debug("TEST")
	trainer = Trainer([xgboost, xgboost_classifier])
	print("Created models, training...")
	trainer.train()
	print("Models have been retrained")


_retrain_models()

Queue is working
Created models, training...
Querying all data from CosmosDB
Query complete
Saving model
Uploaded XGBoostRegressorModel_2023-12-16.pkl successfully!
Model saved at  /tmp/models/XGBoostRegressorModel_2023-12-16.pkl
Shapes: (100000, 28), (100000,)
Index(['timestamp', 'segment_id', 'heavy', 'car', 'v85', 'latitude',
       'longitude', 'altitude', 'distance_km', 'current.temp_c', 'current.uv',
       'current.gust_kph', 'current.air_quality.co', 'current.air_quality.no2',
       'current.air_quality.o3', 'current.air_quality.so2',
       'current.air_quality.pm2_5', 'current.air_quality.pm10', 'p1', 'p2',
       'total_traffic', 'dayofweek', 'hour', 'co_aqi', 'no2_aqi', 'o3_aqi',
       'so2_aqi', 'pm25_aqi', 'pm10_aqi', 'aqi'],
      dtype='object')
Shapes: (80000, 29), (20000, 29)
Saving model
Uploaded XGBoostClassifierModel_2023-12-16.pkl successfully!
Model saved at  /tmp/models/XGBoostClassifierModel_2023-12-16.pkl
Models have been retrained


In [2]:
from modelhub.services.cosmosdb import CosmosDBAccessPoint

cosmos = CosmosDBAccessPoint()
df = cosmos.query_all()

Querying all data from CosmosDB
Query complete


In [3]:
from modelhub.core.preprocessor.regressor_preprocessor import RegressorPreprocessor

preprocessor = RegressorPreprocessor()
preprocessor.preprocess(df)

(                     current.temp_c  current.uv  current.gust_kph  \
 timestamp                                                           
 2023-11-20 12:00:00       10.603448    2.017241         27.186207   
 2023-11-20 13:00:00       10.601556    2.302701         28.062324   
 2023-11-20 14:00:00        9.925475    2.116246         26.612991   
 2023-11-20 15:00:00        9.220963    2.023698         27.345479   
 2023-11-20 16:00:00        9.124434    1.324544         26.014620   
 ...                             ...         ...               ...   
 2023-11-24 07:00:00       10.157746    2.269014         24.131972   
 2023-11-24 08:00:00       10.157746    2.269014         24.131972   
 2023-11-24 09:00:00       10.157746    2.269014         24.131972   
 2023-11-24 10:00:00       10.157746    2.269014         24.131972   
 2023-11-24 11:00:00       10.157746    2.269014         24.131972   
 
                      current.air_quality.co  current.air_quality.no2  \
 timestamp     

In [4]:
# First, import the necessary modules
from modelhub.core.predictions.predictor import Predictor
from modelhub.services.busservice import BusServiceAccessPoint

# Initialize your classes
predictor = Predictor()
bus_service_ap = BusServiceAccessPoint()


# Define the asynchronous function
async def _predict() -> None:
	"""
    Predict using models and send messages to Azure Service Bus
    """
	print("Predicting...")
	aggregated_messages = await bus_service_ap.receive_messages(n = 50)

	return aggregated_messages


# predictions = predictor.predict_batch(aggregated_messages)
# formatted_predictions = format_predictions_for_queue(
# 	predictions,
# 	aggregated_messages,
# )
# return formatted_predictions


# print("Predictions have been sent to Azure Service Bus")
# return predictions

# Run the async function and get the result
results = await _predict()
results


Predicting...


['{\n  "segment_id": 9000005210,\n  "heavy": 6.004454343,\n  "car": 72.0311804009,\n  "v85": 31.0,\n  "coordinates": [\n    [\n      [\n        4.3127545959,\n        50.9075677025\n      ],\n      [\n        4.3117234959,\n        50.9080365025\n      ],\n      [\n        4.3114749959,\n        50.9081425025\n      ],\n      [\n        4.3110873959,\n        50.9083202025\n      ],\n      [\n        4.3109518959,\n        50.9083933025\n      ]\n    ]\n  ],\n  "timestamp": 1701954000000,\n  "latitude": 50.902,\n  "longitude": 4.314,\n  "altitude": "52.8",\n  "sensor_type": {\n    "id": 9,\n    "name": "DHT22",\n    "manufacturer": "various"\n  },\n  "distance_km": 0.6252291097,\n  "current.last_updated": "2023-12-07 14:00",\n  "current.temp_c": 5.0,\n  "current.uv": 3.0,\n  "current.gust_kph": 26.8,\n  "current.air_quality.co": 303.8,\n  "current.air_quality.no2": 17.5,\n  "current.air_quality.o3": 43.6,\n  "current.air_quality.so2": 2.8,\n  "current.air_quality.pm2_5": 5.1,\n  "curre

In [8]:
 from modelhub.core.preprocessor.classifier_preprocessor import ClassifierPreprocessor
import pandas as pd
import json

aggregated_messages = [json.loads(result) for result in results]
aggregated_messages = pd.DataFrame(aggregated_messages)
aggregated_messages = ClassifierPreprocessor().preprocess_prediction(
	aggregated_messages)

In [6]:
import pickle
import os
from modelhub.services.blobstorage import BlobStorageAccessPoint

BlobStorageAccessPoint().pull_models()

models = [
	pickle.load(open(os.path.join(os.getcwd(), "models", model), "rb"))
	for model in os.listdir(os.path.join(os.getcwd(), "models"))
]

models

XGBoostClassifierModel_2023-12-16.pkl
Downloading XGBoostClassifierModel_2023-12-16.pkl to /Users/fifi/Repositories/School/airene/modelhub/modelhub/notebooks/models/XGBoostClassifierModel_2023-12-16.pkl
XGBoostRegressorModel_2023-12-16.pkl
Downloading XGBoostRegressorModel_2023-12-16.pkl to /Users/fifi/Repositories/School/airene/modelhub/modelhub/notebooks/models/XGBoostRegressorModel_2023-12-16.pkl


[XGBRegressor(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...),
 XGBClassifier(base_score=None, booster=None, callbacks=None,
               colsample_bylevel=None, colsample_bynode=None,
               colsample_bytree=None, device=None, early_stopping_rounds=None,
               enable_categorical=Fals

In [10]:
model = models[1]
model.predict(aggregated_messages)

array([1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
       0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
       0, 1, 1, 0, 0, 0])

In [None]:
aggregated_messages = results
xgbr = XGBoostRegressorModel()
xgbr.predict(aggregated_messages)
# predictions = predictor.predict_batch(aggregated_messages)
# predictions
# formatted_predictions = format_predictions_for_queue(
# 	predictions,
# 	aggregated_messages,
# )

In [None]:
predictions['classifier']