In [7]:
import numpy as np
import pandas as pd
import logging
import mlflow
from minio import Minio
from app.pubsub import publish
from predicting_forest_fires.delta.setup import read_from_delta, get_spark_session
from predicting_forest_fires.data.custom import read_csv_from_minio, preprocess_inference_set, transform_and_validate_redis_data, load_trained_model_by_name_and_version, model_prediction
from predicting_forest_fires.config.config import MINIO_ROOT_USER, MINIO_ROOT_PASSWORD, MINIO_BUCKET, MINIO_OBJECT_NAME, MINIO_URL

In [8]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [9]:
data = {
    "X": 5,
    "Y": 6,
    "month": "aug",
    "day": "tue",
    "DMC": 150.5,
    "FFMC": 90.2,
    "DC": 300.7,
    "ISI": 15.4,
    "temp": 20.3,
    "RH": 50,
    "wind": 3.4,
    "rain": 0.2,
}

In [5]:
logger.info(f"Starting prediction task with data: {data}")
validated_data = transform_and_validate_redis_data(redis_data=data)
logger.info(f"Data validated successfully: {validated_data}")
processed_data = preprocess_inference_set(data=validated_data)
logger.info(f"Data processed successfully: {processed_data}")
model = load_trained_model_by_name_and_version(
    model_name="Forest Fire Random Forest Classifier",
    version=2,
)
logger.info("Model loaded successfully")
prediction = model_prediction(
    estimator=model,
    X_test=processed_data,
)
results = "".join(["No" if pred == 0 else "Yes" for pred in prediction])
logger.info(f"Prediction results generated: {results}")
publish(channel="results", message=results)
logger.info("Prediction results published to Redis channel 'results'")
results

INFO:__main__:Starting prediction task with data: {'X': 5, 'Y': 6, 'month': 'aug', 'day': 'tue', 'DMC': 150.5, 'FFMC': 90.2, 'DC': 300.7, 'ISI': 15.4, 'temp': 20.3, 'RH': 50, 'wind': 3.4, 'rain': 0.2}
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  row[column] = (
INFO:__main__:Data validated successfully:    X  Y month  day    DMC  FFMC     DC   ISI  temp  RH  wind  rain
0  5  6   aug  tue  150.5  90.2  300.7  15.4  20.3  50   3.4   0.2
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  processed_data.drop(columns=["area"], inplace=True, errors="ignore")
INFO:__main__:Data processed successfully:   grid_zone_5_6 month_dec grid_zone_9_9 grid_zone_3_6 grid_zone_6_3  \
0      

Data from 'forest_fires_classification.csv':




MlflowException: API request to http://mlflow:5000/api/2.0/mlflow/model-versions/get-download-uri failed with exception HTTPConnectionPool(host='mlflow', port=5000): Max retries exceeded with url: /api/2.0/mlflow/model-versions/get-download-uri?name=Forest+Fire+Random+Forest+Classifier&version=2 (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x28c40fcb0>: Failed to resolve 'mlflow' ([Errno 8] nodename nor servname provided, or not known)"))

In [10]:
model_uri = f"models:/{"Forest Fire Random Forest Classifier"}/{2}"
model = mlflow.pyfunc.load_model(model_uri)
model



MlflowException: API request to http://mlflow:5000/api/2.0/mlflow/model-versions/get-download-uri failed with exception HTTPConnectionPool(host='mlflow', port=5000): Max retries exceeded with url: /api/2.0/mlflow/model-versions/get-download-uri?name=Forest+Fire+Random+Forest+Classifier&version=2 (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x28d0f3d70>: Failed to resolve 'mlflow' ([Errno 8] nodename nor servname provided, or not known)"))