In [1]:
import snowflake.snowpark.functions as F
from snowflake.snowpark.session import Session
from snowflake.snowpark.types import StructType, StructField, FloatType
from snowflake.snowpark import Session
import os
import json

In [2]:
connection_parameters = {
    "account": os.getenv("SNOWFLAKE_ACCOUNT"),
    "user": os.getenv("SNOWFLAKE_USER"),
    "password": os.getenv("SNOWFLAKE_PASSWORD"),
    "schema": "SENSOR",
    "database": "THINGSBOARD",
    "role": os.getenv("SNOWFLAKE_ROLE"),
    "warehouse": os.getenv("SNOWFLAKE_WAREHOUSE"),
}

session = Session.builder.configs(connection_parameters).create()

In [3]:
environmental_df = session.table("CO_TABLE")

In [4]:
environmental_df.show(5)

------------------------------
|"TS"                 |"CO"  |
------------------------------
|2024-01-02 12:30:00  |5.0   |
|2024-01-02 12:20:00  |2.7   |
|2024-01-02 12:10:00  |5.0   |
|2024-01-02 12:00:00  |2.8   |
|2024-01-02 11:50:00  |5.0   |
------------------------------



In [5]:
from snowflake.snowpark.functions import col, date_part, dayofweek, iff


# Add time features to existing DataFrame 
environmental_df = environmental_df.withColumn('day', date_part('day', col('TS'))) \
    .withColumn('month', date_part('month', col('TS'))) \
    .withColumn('hour', date_part('hour', col('TS'))) \
    .withColumn('minute', date_part('minute', col('TS'))) \
    .withColumn('day_of_week', dayofweek(col('TS'))) \
    .withColumn('is_weekend', iff(dayofweek(col('TS')).isin([6, 7]), True, False))

environmental_df.show(5)

---------------------------------------------------------------------------------------------------
|"TS"                 |"CO"  |"DAY"  |"MONTH"  |"HOUR"  |"MINUTE"  |"DAY_OF_WEEK"  |"IS_WEEKEND"  |
---------------------------------------------------------------------------------------------------
|2024-01-02 12:30:00  |5.0   |2      |1        |12      |30        |2              |False         |
|2024-01-02 12:20:00  |2.7   |2      |1        |12      |20        |2              |False         |
|2024-01-02 12:10:00  |5.0   |2      |1        |12      |10        |2              |False         |
|2024-01-02 12:00:00  |2.8   |2      |1        |12      |0         |2              |False         |
|2024-01-02 11:50:00  |5.0   |2      |1        |11      |50        |2              |False         |
---------------------------------------------------------------------------------------------------



In [6]:
environmental_df=environmental_df.drop("TS")

In [7]:
environmental_df.show(5)

-----------------------------------------------------------------------------
|"CO"  |"DAY"  |"MONTH"  |"HOUR"  |"MINUTE"  |"DAY_OF_WEEK"  |"IS_WEEKEND"  |
-----------------------------------------------------------------------------
|5.0   |2      |1        |12      |30        |2              |False         |
|2.7   |2      |1        |12      |20        |2              |False         |
|5.0   |2      |1        |12      |10        |2              |False         |
|2.8   |2      |1        |12      |0         |2              |False         |
|5.0   |2      |1        |11      |50        |2              |False         |
-----------------------------------------------------------------------------



In [8]:
pandas_df =environmental_df.toPandas()

In [9]:
pandas_df.head()

Unnamed: 0,CO,DAY,MONTH,HOUR,MINUTE,DAY_OF_WEEK,IS_WEEKEND
0,5.0,2,1,12,30,2,False
1,2.7,2,1,12,20,2,False
2,5.0,2,1,12,10,2,False
3,2.8,2,1,12,0,2,False
4,5.0,2,1,11,50,2,False


In [10]:
pandas_df.corr()

Unnamed: 0,CO,DAY,MONTH,HOUR,MINUTE,DAY_OF_WEEK,IS_WEEKEND
CO,1.0,0.000135,,-0.057483,-0.09024,0.000135,
DAY,0.000135,1.0,,-0.409194,-0.014686,1.0,
MONTH,,,,,,,
HOUR,-0.057483,-0.409194,,1.0,-0.003983,-0.409194,
MINUTE,-0.09024,-0.014686,,-0.003983,1.0,-0.014686,
DAY_OF_WEEK,0.000135,1.0,,-0.409194,-0.014686,1.0,
IS_WEEKEND,,,,,,,


In [None]:
#####  Tranining DEEP Learning

In [19]:
#!!pip install --upgrade pip
#!pip install tensorflow
#!pip install mlflow


In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import mlflow
import mlflow.sklearn
import mlflow.tensorflow

ModuleNotFoundError: No module named 'tensorflow'

In [11]:
mlflow.set_tracking_uri("https://musical-spork-xp9vpjwpjvp36xgx-5000.app.github.dev/")
mlflow.create_experiment("CO_Experiment")
mlflow.set_experiment("CO_Experiment")

NameError: name 'mlflow' is not defined

In [79]:
def prepare_data(pandas_df):
    X = pandas_df.drop(columns=['CO'])
    y = pandas_df['CO']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled, y_train, y_test
X_train_scaled, X_test_scaled, y_train, y_test = prepare_data(pandas_df)


In [80]:
X_train_scaled[10]

array([ 0.88277033, -0.5860985 ,  0.        , -1.25865385, -0.75592895,
        0.        ,  0.55978854,  0.90656749, -0.75592895,  0.        ])

In [81]:
def train_dl(X_train, X_test, y_train, y_test):
    with mlflow.start_run(run_name="deep_learning"):
        model = Sequential([
            Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
            Dense(32, activation='relu'),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        history = model.fit(
            X_train, y_train,
            epochs=50,
            batch_size=32,
            validation_split=0.2,
            verbose=0
        )
        y_pred = model.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        mlflow.log_param("epochs", 50)
        mlflow.log_param("batch_size", 32)
        mlflow.log_metric("mse", mse)
        mlflow.log_metric("r2", r2)
        mlflow.tensorflow.log_model(model, "deep_learning_model")
        return model, mse, r2


In [82]:
dl_model, dl_mse, dl_r2 = train_dl(X_train_scaled, X_test_scaled, y_train, y_test)
print(f"Deep Learning - MSE: {dl_mse:.4f}, R2: {dl_r2:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




🏃 View run deep_learning at: https://musical-spork-xp9vpjwpjvp36xgx-5000.app.github.dev/#/experiments/6/runs/a0eb1704442a46f6b5aaaba32b925aec
🧪 View experiment at: https://musical-spork-xp9vpjwpjvp36xgx-5000.app.github.dev/#/experiments/6
Deep Learning - MSE: 0.0490, R2: 0.9435


In [None]:
###  Tree model Traning

In [83]:
def train_rf(X_train, X_test, y_train, y_test):
    with mlflow.start_run(run_name="random_forest"):
        rf = RandomForestRegressor(n_estimators=100, random_state=42)
        rf.fit(X_train, y_train)
        
        # Make predictions and calculate metrics
        y_pred = rf.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        
        # Log parameters and metrics
        mlflow.log_param("n_estimators", 100)
        mlflow.log_metric("mse", mse)
        mlflow.log_metric("r2", r2)
        
        # Log model
        mlflow.sklearn.log_model(rf, "random_forest_model")
        
        return rf, mse, r2


In [84]:
rf_model, rf_mse, rf_r2 = train_rf(X_train_scaled, X_test_scaled, y_train, y_test)
print(f"Random Forest - MSE: {rf_mse:.4f}, R2: {rf_r2:.4f}")



🏃 View run random_forest at: https://musical-spork-xp9vpjwpjvp36xgx-5000.app.github.dev/#/experiments/6/runs/fc28e9a64b4a418e9b50603cb6fc8a2f
🧪 View experiment at: https://musical-spork-xp9vpjwpjvp36xgx-5000.app.github.dev/#/experiments/6
Random Forest - MSE: 0.0027, R2: 0.9969
