In [1]:
# !pip install -r requirements.txt

In [22]:
import snowflake.snowpark.functions as F
from snowflake.snowpark.session import Session
from snowflake.snowpark.types import StructType, StructField, FloatType
from snowflake.snowpark import Session
import os
import json
import pandas as pd
import numpy as np

In [29]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib
import mlflow
import mlflow.sklearn
import mlflow.xgboost
import xgboost as xgb

In [30]:
connection_parameters = {
    "account": os.getenv("SNOWFLAKE_ACCOUNT"),
    "user": os.getenv("SNOWFLAKE_USER"),
    "password": os.getenv("SNOWFLAKE_PASSWORD"),
    "schema": "SENSOR",
    "database": "THINGSBOARD",
    "role": os.getenv("SNOWFLAKE_ROLE"),
    "warehouse": os.getenv("SNOWFLAKE_WAREHOUSE"),
}

session = Session.builder.configs(connection_parameters).create()

In [31]:
co_df = session.table("co_table_predication")

In [32]:
co_df.show(5)

--------------------------------------------
|"TS"                 |"CO"                |
--------------------------------------------
|2024-01-01 12:00:00  |4.379166666666667   |
|2024-01-02 12:00:00  |4.270833333333335   |
|2024-01-03 12:00:00  |4.179166666666668   |
|2024-01-04 12:00:00  |4.2250000000000005  |
|2024-01-05 12:00:00  |4.175               |
--------------------------------------------



In [33]:
from snowflake.snowpark.functions import col, date_part, dayofweek, iff


# Add time features to existing DataFrame 
co_df = co_df.withColumn('day', date_part('day', col('TS'))) \
    .withColumn('month', date_part('month', col('TS'))) \
    .withColumn('day_of_week', dayofweek(col('TS'))) 

co_df.show(5)

------------------------------------------------------------------------------
|"TS"                 |"CO"                |"DAY"  |"MONTH"  |"DAY_OF_WEEK"  |
------------------------------------------------------------------------------
|2024-01-01 12:00:00  |4.379166666666667   |1      |1        |1              |
|2024-01-02 12:00:00  |4.270833333333335   |2      |1        |2              |
|2024-01-03 12:00:00  |4.179166666666668   |3      |1        |3              |
|2024-01-04 12:00:00  |4.2250000000000005  |4      |1        |4              |
|2024-01-05 12:00:00  |4.175               |5      |1        |5              |
------------------------------------------------------------------------------



In [34]:
df =co_df.toPandas()

In [35]:
df.head()

Unnamed: 0,TS,CO,DAY,MONTH,DAY_OF_WEEK
0,2024-01-01 12:00:00,4.379167,1,1,1
1,2024-01-02 12:00:00,4.270833,2,1,2
2,2024-01-03 12:00:00,4.179167,3,1,3
3,2024-01-04 12:00:00,4.225,4,1,4
4,2024-01-05 12:00:00,4.175,5,1,5


In [41]:
df = df.sort_values('TS').reset_index(drop=True)

In [43]:
df.drop("TS",axis=1, inplace=True)

In [44]:
df.head()

Unnamed: 0,CO,DAY,MONTH,DAY_OF_WEEK
0,4.379167,1,1,1
1,4.270833,2,1,2
2,4.179167,3,1,3
3,4.225,4,1,4
4,4.175,5,1,5


In [45]:
# Define feature columns
feature_cols = [
    'DAY', 'MONTH', 'DAY_OF_WEEK'
]

X = df[feature_cols]
y = df['CO']



In [50]:
import mlflow

mlflow.set_tracking_uri("https://legendary-dollop-7vr49x7r7v9pcxwrw-5000.app.github.dev/")


In [51]:
mlflow.set_experiment("CO_Predict")


2024/11/26 14:55:01 INFO mlflow.tracking.fluent: Experiment with name 'CO_Predict' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/artifacts/6', creation_time=1732632901301, experiment_id='6', last_update_time=1732632901301, lifecycle_stage='active', name='CO_Predict', tags={}>

In [52]:
tscv = TimeSeriesSplit(n_splits=3)

# Start MLflow Run
with mlflow.start_run(run_name="RandomForest_Regressor_Run") as run:
    # Define and Train the Model
    rf = RandomForestRegressor(n_estimators=200, max_depth=None, min_samples_split=2, random_state=42)
    rf.fit(X, y)

    # Predictions (Using the entire dataset for simplicity; consider using a separate test set)
    predictions = rf.predict(X)

    # Calculate Metrics
    mae = mean_absolute_error(y, predictions)
    rmse = np.sqrt(mean_squared_error(y, predictions))

    # Log Parameters
    mlflow.log_param("model", "RandomForestRegressor")
    mlflow.log_param("n_estimators", 200)
    mlflow.log_param("max_depth", None)
    mlflow.log_param("min_samples_split", 2)
    mlflow.log_param("random_state", 42)

    # Log Metrics
    mlflow.log_metric("MAE", mae)
    mlflow.log_metric("RMSE", rmse)

    # Log the Model
    mlflow.sklearn.log_model(rf, "model")

    print(f"Run ID: {run.info.run_id}")
    print(f"Logged MAE: {mae:.2f}, RMSE: {rmse:.2f}")

The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh(<full-path-to-git-executable>)

All git commands will error until this is rectified.

This initial message can be silenced or aggravated in the future by setting the
$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - quiet|q|silence|s|silent|none|n|0: for no message or exception
    - error|e|exception|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



Run ID: 3ff63f85ae5a447f95316d65e9328363
Logged MAE: 0.03, RMSE: 0.03
🏃 View run RandomForest_Regressor_Run at: https://legendary-dollop-7vr49x7r7v9pcxwrw-5000.app.github.dev/#/experiments/6/runs/3ff63f85ae5a447f95316d65e9328363
🧪 View experiment at: https://legendary-dollop-7vr49x7r7v9pcxwrw-5000.app.github.dev/#/experiments/6


In [53]:
# Get the Run ID from the previous step
run_id = run.info.run_id

# Register the model
model_name = "CO_Predict"
result = mlflow.register_model(
    f"runs:/{run_id}/model",
    model_name
)

print(f"Registered Model: {result.name}, Version: {result.version}")


Successfully registered model 'CO_Predict'.
2024/11/26 14:55:50 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CO_Predict, version 1


Registered Model: CO_Predict, Version: 1


Created version '1' of model 'CO_Predict'.


In [54]:
from mlflow.tracking import MlflowClient

client = MlflowClient()

# Transition the model to 'Staging'
client.transition_model_version_stage(
    name=model_name,
    version=result.version,
    stage="Staging",
    archive_existing_versions=True
)

print(f"Model {model_name} version {result.version} transitioned to Staging.")


Model CO_Predict version 1 transitioned to Staging.


  client.transition_model_version_stage(
