In [18]:
###Imports

import pandas as pd
import uuid
import pickle
from sklearn.metrics import mean_squared_error,accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from datetime import datetime
import time
import mlflow
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType




# MLflow Experiment Setup

In [19]:
###Globals

MLFLOW_TRACKING_URI="sqlite:///mlflow.db"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment("stress-level")
model_name="stress-level"
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

# Reading Input data

In [20]:

def generate_uuids(n):
    ids=[]
    for i in range(n):
        ids.append(str(uuid.uuid4()))       
    return ids

def read_dataFrame(input_file):
    df=pd.read_csv(input_file)
    df.columns=['snoring_rate', 'respiration_rate', 'body_temperature', 'limb_movement', 'blood_oxygen', \
             'eye_movement', 'sleeping_hours', 'heart_rate', 'stress_level']
    
    df['id']=generate_uuids(len(df))
    X = df.drop(['snoring_rate','limb_movement','eye_movement','stress_level','id'],axis=1)
    y = df.stress_level
    X_train, X_test, y_train, y_test = train_test_split(X, y,train_size=0.7, 
                                                    random_state=0)
    return X_train,y_train,X_test,y_test



# Model Training and logging to MLflow

In [21]:

def training_logging(X_train,y_train,X_test,y_test):
    classifiers = [('Random Forest', RandomForestClassifier()), 
                   ('Decision Tree Classifier', DecisionTreeClassifier()), 
                   ('Gradient Boost Classifier',GradientBoostingClassifier(n_estimators=20, random_state = 0)),
                   ('Naive Bayes', GaussianNB())]


    for clf_name, clf in classifiers:
        mlflow.sklearn.autolog()                
        with mlflow.start_run():                
            mlflow.set_tag("model",f"{clf_name}")
            model=clf.fit(X_train, y_train)
            with open(f'models/{clf_name}.bin','wb') as f_out:
                pickle.dump((model),f_out)
            y_pred = clf.predict(X_test)
            rmse = mean_squared_error(y_test, y_pred,squared="False")  
            print("RMSE of ", clf_name, " is: %.4f" % rmse)
            mlflow.log_metric("rmse",rmse)
            mlflow.log_artifact(local_path=f"models/{clf_name}.bin",artifact_path="models_pickle")
            accuracy = accuracy_score(y_test, y_pred)
            print("Test Accuracy of ", clf_name, " is ", accuracy)

# MLflow Client to select the best model

In [22]:
def select_model():
    best_run = client.search_runs(
        experiment_ids='2',
        filter_string="metrics.rmse<0.01",
        run_view_type=ViewType.ACTIVE_ONLY,
        max_results=1,
        order_by=["metrics.rmse ASC"]
    )

    for run in best_run:
        best_run_id=run.info.run_id
        best_run_rmse=run.data.metrics['rmse']
        print(f"run id of best run: {best_run_id}, rmse: {best_run_rmse}")
        return best_run_id

# Registering the best model

In [23]:
def register_model(best_run_id):
    model_uri=f"runs:/{best_run_id}/model"
    mlflow.register_model(model_uri=model_uri,name=model_name)
    #print("Model regstered successfully")

# Promoting latest version of best model to Production

In [24]:
def promote_model():
    global best_model_latest_version
    latest_ver=client.get_latest_versions(name=model_name)
    new_stage="Production"
    date=datetime.today().date()
    for version in latest_ver:
        best_model_latest_version=version.version
        current_stage=version.current_stage
    
    client.transition_model_version_stage(
        name=model_name,
        version=best_model_latest_version,
        stage=new_stage,
        archive_existing_versions=False
    )
    client.update_model_version(
        name=model_name,
        version=best_model_latest_version,
        description=f"The model version {best_model_latest_version} was transitioned to {new_stage} on {date}"
    )
    print(f"The latest version of model is {best_model_latest_version}.It is promoted successfully to {new_stage}")

In [25]:
def train():
    input_file='./data/SaYoPillow.csv'
    X_train,y_train,X_test,y_test=read_dataFrame(input_file)
    training_logging(X_train,y_train,X_test,y_test)
    time.sleep(3)
    best_run_id=select_model()
    time.sleep(3)
    register_model(best_run_id)
    time.sleep(3)
    promote_model()
    print("Finished")
    

In [26]:
train()



RMSE of  Random Forest  is: 0.0053




Test Accuracy of  Random Forest  is  0.9947089947089947
RMSE of  Decision Tree Classifier  is: 0.0106




Test Accuracy of  Decision Tree Classifier  is  0.9894179894179894
RMSE of  Gradient Boost Classifier  is: 0.0106




Test Accuracy of  Gradient Boost Classifier  is  0.9894179894179894
RMSE of  Naive Bayes  is: 0.0000
Test Accuracy of  Naive Bayes  is  1.0
run id of best run: e0592c2ec53842a38139eb2cad63c24b, rmse: 0.0


Registered model 'stress-level-model' already exists. Creating a new version of this model...
2023/02/15 12:45:18 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: stress-level-model, version 1
Created version '1' of model 'stress-level-model'.


The latest version of model is 1.It is promoted successfully to Production
Finished


In [30]:
input_file='./data/SaYoPillow.csv'
X_train,y_train,X_test,y_test=read_dataFrame(input_file)

     respiration_rate  body_temperature  blood_oxygen  sleeping_hours  \
0              25.680            91.840        89.840           1.840   
1              25.104            91.552        89.552           1.552   
2              20.000            96.000        95.000           7.000   
3              23.536            90.768        88.768           0.768   
4              17.248            97.872        96.248           8.248   
..                ...               ...           ...             ...   
625            20.960            92.960        90.960           3.440   
626            17.376            98.064        96.376           8.376   
627            27.504            86.880        84.256           0.000   
628            19.728            95.728        94.592           6.728   
629            21.392            93.392        91.392           4.088   

     heart_rate  
0         74.20  
1         72.76  
2         60.00  
3         68.84  
4         53.12  
..          ...