In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import mlflow
import mlflow.sklearn

X_train_reg = pd.read_csv("../data/processed/reg/X_train_reg.csv")
y_train_reg = pd.read_csv("../data/processed/reg/y_train_reg.csv")[["temperature"]]
X_test_reg = pd.read_csv("../data/processed/reg/X_test_reg.csv")
y_test_reg = pd.read_csv("../data/processed/reg/y_test_reg.csv")[["temperature"]]

In [2]:
models = {
    "RandomForest": RandomForestRegressor(n_estimators=100, random_state=42),
    "GradientBoosting": GradientBoostingRegressor(n_estimators=100, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
}


In [3]:
import mlflow
import mlflow.sklearn
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, r2_score
import logging
import os
mlflow.set_tracking_uri("file:///C:/Users/ABHINAV/Desktop/Prog/Projects/AeolusAI/mlruns")

In [4]:
import mlflow
import mlflow.sklearn
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import logging

# Logging setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("AeolusAI")
  # or leave default

# Define experiment
mlflow.set_experiment("temperature_regression")

# Flatten targets
y_train_reg = y_train_reg.values.ravel()
y_test_reg = y_test_reg.values.ravel()

best_model = None
best_score = -np.inf
best_name = ""
best_run_id = ""

for name, model in models.items():
    try:
        with mlflow.start_run(run_name=name) as run:
            logger.info(f"🚀 Training {name}...")
            
            cv_mae = -cross_val_score(model, X_train_reg, y_train_reg, cv=2, scoring="neg_mean_absolute_error").mean()
            cv_r2 = cross_val_score(model, X_train_reg, y_train_reg, cv=2, scoring="r2").mean()

            model.fit(X_train_reg, y_train_reg)
            y_pred = model.predict(X_test_reg)
            mae = mean_absolute_error(y_test_reg, y_pred)
            rmse = root_mean_squared_error(y_test_reg, y_pred)
            r2 = r2_score(y_test_reg, y_pred)

            logger.info(f"📊 MAE: {mae:.3f} | RMSE: {rmse:.3f} | R2: {r2:.3f}")

            # Log to MLflow
            mlflow.set_tags({"developer": "Abhinav", "stage": "dev"})
            mlflow.log_param("model_name", name)
            mlflow.log_metric("cv_mae", cv_mae)
            mlflow.log_metric("cv_r2", cv_r2)
            mlflow.log_metric("test_mae", mae)
            mlflow.log_metric("test_rmse", rmse)
            mlflow.log_metric("test_r2", r2)

            mlflow.sklearn.log_model(model, "model")

            if r2 > best_score:
                best_score = r2
                best_model = model
                best_name = name
                best_run_id = run.info.run_id

    except Exception as e:
        logger.error(f"❌ Error training model {name}: {str(e)}")

if best_model:
    logger.info(f"🏅 Best model: {best_name} with R2: {best_score:.3f}")
    model_uri = f"runs:/{best_run_id}/model"
    model_version = mlflow.register_model(model_uri=model_uri, name="TemperatureRegressor")

    logger.info(f"📦 Registered model version: {model_version.version}")


INFO:AeolusAI:🚀 Training RandomForest...
INFO:AeolusAI:📊 MAE: 1.305 | RMSE: 1.892 | R2: 0.961
INFO:AeolusAI:🚀 Training GradientBoosting...
INFO:AeolusAI:📊 MAE: 2.460 | RMSE: 3.284 | R2: 0.882
INFO:AeolusAI:🚀 Training XGBoost...
INFO:AeolusAI:📊 MAE: 1.489 | RMSE: 2.046 | R2: 0.954
INFO:AeolusAI:🏅 Best model: RandomForest with R2: 0.961
Registered model 'TemperatureRegressor' already exists. Creating a new version of this model...
Created version '3' of model 'TemperatureRegressor'.
INFO:AeolusAI:📦 Registered model version: 3


In [5]:
X_train_precip = pd.read_csv("../data/processed/reg/X_train_reg.csv")
y_train_precip = pd.read_csv("../data/processed/reg/y_train_reg.csv")[["precipitation"]]
X_test_precip = pd.read_csv("../data/processed/reg/X_test_reg.csv")
y_test_precip = pd.read_csv("../data/processed/reg/y_test_reg.csv")[["precipitation"]]

In [6]:
import mlflow
import mlflow.sklearn
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, r2_score
import numpy as np
import logging

# Logging setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("AeolusAI")

# Define experiment
mlflow.set_experiment("precipitation_regression")

# Flatten targets
if hasattr(y_train_precip, "values"):
    y_train_precip = y_train_precip.values.ravel()
    y_test_precip = y_test_precip.values.ravel()
else:
    y_train_precip = y_train_precip.ravel()
    y_test_precip = y_test_precip.ravel()

best_model = None
best_score = -np.inf
best_name = ""
best_run_id = ""

for name, model in models.items():
    try:
        with mlflow.start_run(run_name=name) as run:
            logger.info(f"🌧️ Training {name} for precipitation...")

            cv_mae = -cross_val_score(model, X_train_precip, y_train_precip, cv=2, scoring="neg_mean_absolute_error").mean()
            cv_r2 = cross_val_score(model, X_train_precip, y_train_precip, cv=2, scoring="r2").mean()

            model.fit(X_train_precip, y_train_precip)
            y_pred = model.predict(X_test_precip)
            mae = mean_absolute_error(y_test_precip, y_pred)
            rmse = root_mean_squared_error(y_test_precip, y_pred)
            r2 = r2_score(y_test_precip, y_pred)

            logger.info(f"📊 MAE: {mae:.3f} | RMSE: {rmse:.3f} | R2: {r2:.3f}")

            # Log to MLflow
            mlflow.set_tags({"developer": "Abhinav", "stage": "dev"})
            mlflow.log_param("model_name", name)
            mlflow.log_metric("cv_mae", cv_mae)
            mlflow.log_metric("cv_r2", cv_r2)
            mlflow.log_metric("test_mae", mae)
            mlflow.log_metric("test_rmse", rmse)
            mlflow.log_metric("test_r2", r2)

            mlflow.sklearn.log_model(model, "model")

            if r2 > best_score:
                best_score = r2
                best_model = model
                best_name = name
                best_run_id = run.info.run_id

    except Exception as e:
        logger.error(f"❌ Error training model {name}: {str(e)}")

if best_model:
    logger.info(f"🏅 Best precipitation model: {best_name} with R2: {best_score:.3f}")
    model_uri = f"runs:/{best_run_id}/model"
    model_version = mlflow.register_model(model_uri=model_uri, name="PrecipitationRegressor")

    logger.info(f"📦 Registered model version: {model_version.version}")


INFO:AeolusAI:🌧️ Training RandomForest for precipitation...
INFO:AeolusAI:📊 MAE: 0.175 | RMSE: 0.531 | R2: 0.184
INFO:AeolusAI:🌧️ Training GradientBoosting for precipitation...
INFO:AeolusAI:📊 MAE: 0.181 | RMSE: 0.555 | R2: 0.110
INFO:AeolusAI:🌧️ Training XGBoost for precipitation...
INFO:AeolusAI:📊 MAE: 0.183 | RMSE: 0.558 | R2: 0.101
INFO:AeolusAI:🏅 Best precipitation model: RandomForest with R2: 0.184
Registered model 'PrecipitationRegressor' already exists. Creating a new version of this model...
Created version '2' of model 'PrecipitationRegressor'.
INFO:AeolusAI:📦 Registered model version: 2
