In [1]:
import os
import pickle
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn import metrics

import sqlalchemy
import snowflake.connector
from sqlalchemy import create_engine
from snowflake.sqlalchemy import *

import xgboost
from datetime import datetime, timedelta
import time  
import pytz    
tz_NY = pytz.timezone('Asia/Kolkata')

import snowflake_creds
import LOS_Preprocessing
from LOS_Preprocessing import preprocess_data

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Create the connection engine
engine = create_engine(URL(
    account="gqrxtcf-bbb55063",
    user=snowflake_creds.USER_NAME,
    password=snowflake_creds.PASSWORD,
    role="ACCOUNTADMIN",
    warehouse="COMPUTE_WH",
    database="HEALTHDB",
    schema="HEALTHSCHEMA"
))

# Test the connection
try:
    with engine.connect() as conn:
        print("Connection successful!")
except Exception as e:
    print(f"Connection failed: {e}")

Connection successful!


In [3]:
query = """

WITH BASE AS (

    SELECT CASE_ID,
           COALESCE(HOSPITAL_CODE,0) AS HOSPITAL_CODE,
           COALESCE(HOSPITAL_TYPE_CODE,'None') AS HOSPITAL_TYPE_CODE,
           COALESCE(CITY_CODE_HOSPITAL,0) AS CITY_CODE_HOSPITAL,
           COALESCE(HOSPITAL_REGION_CODE,'None') AS HOSPITAL_REGION_CODE,
           COALESCE(AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL,0) AS AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL,
           COALESCE(DEPARTMENT,'None') AS DEPARTMENT,
           COALESCE(WARD_TYPE,'None') AS WARD_TYPE,
           COALESCE(WARD_FACILITY_CODE,'None') AS WARD_FACILITY_CODE,
           COALESCE(BED_GRADE,0) AS BED_GRADE,
           PATIENTID,
           COALESCE(CITY_CODE_PATIENT,0) AS CITY_CODE_PATIENT,
           COALESCE(TYPE_OF_ADMISSION,'None') AS TYPE_OF_ADMISSION,
           COALESCE(SEVERITY_OF_ILLNESS,'Minor') AS SEVERITY_OF_ILLNESS,
           COALESCE(VISITORS_WITH_PATIENT,0) AS VISITORS_WITH_PATIENT,
           COALESCE(AGE,'None') AS AGE,
           COALESCE(ADMISSION_DEPOSIT,0) AS ADMISSION_DEPOSIT,
           ADMISSION_DATE,
           DISCHARGE_DATE

    FROM HEALTHDB.HEALTHSCHEMA.SIMULATION_DATA

),
    
BASE_WITH_FEATURES AS (

    SELECT *,
            MONTHNAME(ADMISSION_DATE) AS ADMISSION_MONTH,
            DAYNAME(ADMISSION_DATE) AS ADMISSION_DAY,
            CONCAT(TYPE_OF_ADMISSION,'-',SEVERITY_OF_ILLNESS) AS ADMISSION_ILLNESS,
            CONCAT(SEVERITY_OF_ILLNESS,'-',BED_GRADE) AS ILLNESS_BEDGRADE,
            CONCAT(DEPARTMENT,'-',SEVERITY_OF_ILLNESS) AS DEPARTMENT_ILLNESS,
            DATEDIFF(day,ADMISSION_DATE,DISCHARGE_DATE) AS LOS
    FROM BASE 

)    

SELECT * FROM BASE_WITH_FEATURES WHERE ADMISSION_DATE = CURRENT_DATE-45
"""

In [4]:
def check_n_create_model_features(df,feat_list):
    test = pd.DataFrame()
    for col in feat_list:
        if col in df.columns.tolist():
            test[col] = df[col]
        else:
            test[col] = 0
    
    return test

In [5]:
def insert_predictions_to_snowflake_table(data):
    import pandas
    import snowflake.connector
    from snowflake.connector.pandas_tools import pd_writer, write_pandas

    engine = create_engine(URL(
    account="gqrxtcf-bbb55063",
    user=snowflake_creds.USER_NAME,
    password=snowflake_creds.PASSWORD,
    role="ACCOUNTADMIN",
    warehouse="COMPUTE_WH",
    database="HEALTHDB",
    schema="HEALTHSCHEMA"
        ))
    
    # Creating the logging table if not exists already
    table = 'TEMP_LOS_PREDICTION_MODEL_LOGGING_TABLE_HARI'
    
    # Inserting the data to snowflake logging table
    data.to_sql(table, engine, index=False, if_exists='append', method=pd_writer)
    return 'Success'

In [6]:
with engine.connect() as conn:
    
    # Loading the scoring data
    score_data = pd.DataFrame(pd.read_sql(query, conn))
    score_data.columns = [col.upper() for col in score_data.columns.tolist()]
    
    # Applying the preprocessing steps
    score_data_processed = LOS_Preprocessing.preprocess_data(score_data)
    
    # Retain CASE_ID in the processed data
    score_data_processed['CASE_ID'] = score_data['CASE_ID']
    
    # Applying feature selection
    final_feats = pd.read_pickle('MODEL_FEATS.pkl')
    score_data_final = check_n_create_model_features(score_data_processed, final_feats)
    
    # Ensure CASE_ID is included in score_data_final
    score_data_final['CASE_ID'] = score_data_processed['CASE_ID']
    
    # Getting the predictions
    model = xgboost.XGBRegressor()
    model.load_model('MODEL_XGB.model')
    score_data_final['PREDICTED_LOS'] = np.ceil(model.predict(score_data_final.drop(['LOS', 'CASE_ID'], axis=1)))
    
    # Writing the dataframe to Snowflake as a table
    score_data_final = score_data_final.reset_index(drop=True)
    score_data_table = pd.merge(score_data, score_data_final, on='CASE_ID', how='left')
    status = insert_predictions_to_snowflake_table(score_data_table)

In [7]:
# Inspect columns of score_data
print("Columns in score_data:", score_data.columns)

# Inspect columns of score_data_final
print("Columns in score_data_final:", score_data_final.columns)

# Ensure CASE_ID column exists in both DataFrames
if 'CASE_ID' not in score_data.columns:
    print("CASE_ID column is missing in score_data")
if 'CASE_ID' not in score_data_final.columns:
    print("CASE_ID column is missing in score_data_final")

# If CASE_ID column has different name or spaces, rename it
score_data.columns = score_data.columns.str.strip()
score_data_final.columns = score_data_final.columns.str.strip()

# Merge DataFrames
try:
    score_data_table = pd.merge(score_data, score_data_final, on='CASE_ID', how='left')
    print("Merge successful!")
except KeyError as e:
    print(f"Merge failed: {e}")

Columns in score_data: Index(['CASE_ID', 'HOSPITAL_CODE', 'HOSPITAL_TYPE_CODE', 'CITY_CODE_HOSPITAL',
       'HOSPITAL_REGION_CODE', 'AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL',
       'DEPARTMENT', 'WARD_TYPE', 'WARD_FACILITY_CODE', 'BED_GRADE',
       'PATIENTID', 'CITY_CODE_PATIENT', 'TYPE_OF_ADMISSION',
       'SEVERITY_OF_ILLNESS', 'VISITORS_WITH_PATIENT', 'AGE',
       'ADMISSION_DEPOSIT', 'ADMISSION_DATE', 'DISCHARGE_DATE',
       'ADMISSION_MONTH', 'ADMISSION_DAY', 'ADMISSION_ILLNESS',
       'ILLNESS_BEDGRADE', 'DEPARTMENT_ILLNESS', 'LOS'],
      dtype='object')
Columns in score_data_final: Index(['AGE_41-50', 'WARD_TYPE_Q', 'ADMISSION_MONTH_Oct', 'ADMISSION_DAY_Sat',
       'ADMISSION_DAY_Mon', 'ADMISSION_DAY_Sun', 'ADMISSION_DAY_Thu',
       'BED_GRADE_2', 'AGE_31-40', 'ADMISSION_DAY_Fri', 'ADMISSION_DAY_Wed',
       'TYPE_OF_ADMISSION_Emergency', 'WARD_TYPE_S', 'CITY_CODE_HOSPITAL_7',
       'TYPE_OF_ADMISSION_Trauma', 'WARD_TYPE_P', 'ILLNESS_BEDGRADE_Extreme-1',
       'SEVERITY_O

In [8]:
print(score_data.shape)
score_data.head()

(0, 25)


Unnamed: 0,CASE_ID,HOSPITAL_CODE,HOSPITAL_TYPE_CODE,CITY_CODE_HOSPITAL,HOSPITAL_REGION_CODE,AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL,DEPARTMENT,WARD_TYPE,WARD_FACILITY_CODE,BED_GRADE,PATIENTID,CITY_CODE_PATIENT,TYPE_OF_ADMISSION,SEVERITY_OF_ILLNESS,VISITORS_WITH_PATIENT,AGE,ADMISSION_DEPOSIT,ADMISSION_DATE,DISCHARGE_DATE,ADMISSION_MONTH,ADMISSION_DAY,ADMISSION_ILLNESS,ILLNESS_BEDGRADE,DEPARTMENT_ILLNESS,LOS


In [9]:
print(score_data_processed.shape)
score_data_processed.head()

(0, 5)


Unnamed: 0_level_0,AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL,VISITORS_WITH_PATIENT,ADMISSION_DEPOSIT,LOS,CASE_ID
CASE_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


In [10]:
print(score_data_final.shape)
score_data_final.head()

(0, 27)


Unnamed: 0,AGE_41-50,WARD_TYPE_Q,ADMISSION_MONTH_Oct,ADMISSION_DAY_Sat,ADMISSION_DAY_Mon,ADMISSION_DAY_Sun,ADMISSION_DAY_Thu,BED_GRADE_2,AGE_31-40,ADMISSION_DAY_Fri,ADMISSION_DAY_Wed,TYPE_OF_ADMISSION_Emergency,WARD_TYPE_S,CITY_CODE_HOSPITAL_7,TYPE_OF_ADMISSION_Trauma,WARD_TYPE_P,ILLNESS_BEDGRADE_Extreme-1,SEVERITY_OF_ILLNESS_Minor,VISITORS_WITH_PATIENT,AGE_51-60,AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL,ADMISSION_DAY_Tue,ADMISSION_DEPOSIT,ADMISSION_MONTH_Nov,LOS,CASE_ID,PREDICTED_LOS


In [11]:
print(score_data_table.shape)
score_data_table.head()

(0, 51)


Unnamed: 0,CASE_ID,HOSPITAL_CODE,HOSPITAL_TYPE_CODE,CITY_CODE_HOSPITAL,HOSPITAL_REGION_CODE,AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL_x,DEPARTMENT,WARD_TYPE,WARD_FACILITY_CODE,BED_GRADE,PATIENTID,CITY_CODE_PATIENT,TYPE_OF_ADMISSION,SEVERITY_OF_ILLNESS,VISITORS_WITH_PATIENT_x,AGE,ADMISSION_DEPOSIT_x,ADMISSION_DATE,DISCHARGE_DATE,ADMISSION_MONTH,ADMISSION_DAY,ADMISSION_ILLNESS,ILLNESS_BEDGRADE,DEPARTMENT_ILLNESS,LOS_x,AGE_41-50,WARD_TYPE_Q,ADMISSION_MONTH_Oct,ADMISSION_DAY_Sat,ADMISSION_DAY_Mon,ADMISSION_DAY_Sun,ADMISSION_DAY_Thu,BED_GRADE_2,AGE_31-40,ADMISSION_DAY_Fri,ADMISSION_DAY_Wed,TYPE_OF_ADMISSION_Emergency,WARD_TYPE_S,CITY_CODE_HOSPITAL_7,TYPE_OF_ADMISSION_Trauma,WARD_TYPE_P,ILLNESS_BEDGRADE_Extreme-1,SEVERITY_OF_ILLNESS_Minor,VISITORS_WITH_PATIENT_y,AGE_51-60,AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL_y,ADMISSION_DAY_Tue,ADMISSION_DEPOSIT_y,ADMISSION_MONTH_Nov,LOS_y,PREDICTED_LOS


# Scoring Function for Deployment:

In [12]:
query = """

WITH BASE AS (

    SELECT CASE_ID,
           COALESCE(HOSPITAL_CODE,0) AS HOSPITAL_CODE,
           COALESCE(HOSPITAL_TYPE_CODE,'None') AS HOSPITAL_TYPE_CODE,
           COALESCE(CITY_CODE_HOSPITAL,0) AS CITY_CODE_HOSPITAL,
           COALESCE(HOSPITAL_REGION_CODE,'None') AS HOSPITAL_REGION_CODE,
           COALESCE(AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL,0) AS AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL,
           COALESCE(DEPARTMENT,'None') AS DEPARTMENT,
           COALESCE(WARD_TYPE,'None') AS WARD_TYPE,
           COALESCE(WARD_FACILITY_CODE,'None') AS WARD_FACILITY_CODE,
           COALESCE(BED_GRADE,0) AS BED_GRADE,
           PATIENTID,
           COALESCE(CITY_CODE_PATIENT,0) AS CITY_CODE_PATIENT,
           COALESCE(TYPE_OF_ADMISSION,'None') AS TYPE_OF_ADMISSION,
           COALESCE(SEVERITY_OF_ILLNESS,'Minor') AS SEVERITY_OF_ILLNESS,
           COALESCE(VISITORS_WITH_PATIENT,0) AS VISITORS_WITH_PATIENT,
           COALESCE(AGE,'None') AS AGE,
           COALESCE(ADMISSION_DEPOSIT,0) AS ADMISSION_DEPOSIT,
           ADMISSION_DATE,
           DISCHARGE_DATE

    FROM HEALTHDB.HEALTHSCHEMA.SIMULATION_DATA

),
    
BASE_WITH_FEATURES AS (

    SELECT *,
            MONTHNAME(ADMISSION_DATE) AS ADMISSION_MONTH,
            DAYNAME(ADMISSION_DATE) AS ADMISSION_DAY,
            CONCAT(TYPE_OF_ADMISSION,'-',SEVERITY_OF_ILLNESS) AS ADMISSION_ILLNESS,
            CONCAT(SEVERITY_OF_ILLNESS,'-',BED_GRADE) AS ILLNESS_BEDGRADE,
            CONCAT(DEPARTMENT,'-',SEVERITY_OF_ILLNESS) AS DEPARTMENT_ILLNESS,
            DATEDIFF(day,ADMISSION_DATE,DISCHARGE_DATE) AS LOS
    FROM BASE 

)    

SELECT * FROM BASE_WITH_FEATURES WHERE ADMISSION_DATE = CURRENT_DATE-45
"""

In [13]:
def check_n_create_model_features(df,feat_list):
    test = pd.DataFrame()
    for col in feat_list:
        if col in df.columns.tolist():
            test[col] = df[col]
        else:
            test[col] = 0
    
    return test

In [14]:
def insert_predictions_to_snowflake_table(data):
    import pandas
    import snowflake.connector
    from snowflake.connector.pandas_tools import pd_writer, write_pandas

    engine = create_engine(URL(
    account="gqrxtcf-bbb55063",
    user=snowflake_creds.USER_NAME,
    password=snowflake_creds.PASSWORD,
    role="ACCOUNTADMIN",
    warehouse="COMPUTE_WH",
    database="HEALTHDB",
    schema="HEALTHSCHEMA"
        ))
    
    # Creating the logging table if not exists already
    table = 'TEMP_LOS_PREDICTION_MODEL_LOGGING_TABLE_HARI'
    
    # Inserting the data to snowflake logging table
    data.to_sql(table, engine, index=False, if_exists='append', method=pd_writer)
    return 'Success'

In [15]:
def send_status_mail(mail_string):
    import mail_creds
    import smtplib
    from email.mime.multipart import MIMEMultipart
    from email.mime.text import MIMEText
    from email.mime.multipart import MIMEMultipart
    from email.mime.base import MIMEBase
    from email.mime.text import MIMEText
    from email.utils import formatdate
    from email import encoders
    
    subject = 'Patient LOS Prediction - STATUS MAIL'
    mail_content = mail_string

    username= mail_creds.MAIL_ID
    password= mail_creds.APP_PASSWORD
    send_from = mail_creds.MAIL_ID
    send_to = mail_creds.MAIL_ID
    Cc = ''
    msg = MIMEMultipart()
    msg['From'] = send_from
    msg['To'] = send_to
    msg['Cc'] = Cc
    msg['Date'] = formatdate(localtime = True)
    msg['Subject'] = subject
    msg.attach(MIMEText(mail_content, 'plain'))
    smtp = smtplib.SMTP('smtp.gmail.com',587)
    smtp.ehlo()
    smtp.starttls()
    smtp.login(username,password)
    smtp.sendmail(send_from, send_to.split(',') + msg['Cc'].split(','), msg.as_string())
    smtp.quit()    

In [25]:
def LOS_MODEL_DEPLOYMENT():
    try:
        import pickle
        import pandas as pd
        import numpy as np
        import sqlalchemy
        import snowflake.connector
        from sqlalchemy import create_engine

        import xgboost as xgb
        from datetime import datetime, timedelta
        import time  
        import pytz    
        tz_NY = pytz.timezone('Asia/Shanghai')

        import warnings
        warnings.filterwarnings('ignore')   
        
        import snowflake_creds
        import LOS_Preprocessing
        from LOS_Preprocessing import preprocess_data
        
        engine = create_engine(URL(
                account="gqrxtcf-bbb55063",
                user=snowflake_creds.USER_NAME,
                password=snowflake_creds.PASSWORD,
                role="ACCOUNTADMIN",
                warehouse="COMPUTE_WH",
                database="HEALTHDB",
                schema="HEALTHSCHEMA"
            ))
        
        mail_list = []
        
        with engine.connect() as conn:
    
            # Loading the scoring data
            score_data = pd.DataFrame(pd.read_sql(query, conn))
            score_data.columns = [col.upper() for col in score_data.columns.tolist()]
            mail_list.append('STEP-1: Loading Data complete')

            # Applying the preprocessing steps
            score_data_processed = LOS_Preprocessing.preprocess_data(score_data)
            mail_list.append('STEP-2: Applying the Preprocessing Steps complete')

            # Retain CASE_ID in the processed data
            score_data_processed['CASE_ID'] = score_data['CASE_ID']

            # Applying feature selection
            final_feats = pd.read_pickle('MODEL_FEATS.pkl')
            score_data_final = check_n_create_model_features(score_data_processed, final_feats)
            mail_list.append('STEP-3: Applying the Feature Selection Steps complete')

            # Ensure CASE_ID is included in score_data_final
            score_data_final['CASE_ID'] = score_data_processed['CASE_ID']

            # Getting the predictions
            model = xgboost.XGBRegressor()
            model.load_model('MODEL_XGB.model')
            score_data_final['PREDICTED_LOS'] = np.ceil(model.predict(score_data_final.drop(['LOS', 'CASE_ID'], axis=1)))
            mail_list.append('STEP-4: Getting the Predictions complete')

            # Writing the dataframe to Snowflake as a table
            score_data_final = score_data_final.reset_index(drop=True)
            score_data_table = pd.merge(score_data, score_data_final, on='CASE_ID', how='left')
            status = insert_predictions_to_snowflake_table(score_data_table)
            mail_list.append('STEP-5: Writing the data to Snowflake complete')
            
            # Creating the mail body
            mail_string = ",\n ".join(map(str, mail_list))
            send_status_mail(mail_string)
            print('Success')
    
    except Exception as e:
        mail_content = f'Schedule failed: {e}'
        send_status_mail(mail_content)
        print(f'Failed: {e}')

In [26]:
LOS_MODEL_DEPLOYMENT()

Success


# Live Data Scoring Simulation:

In [28]:
# Creating the connection engine (way 1)
engine = create_engine(URL(
    account="gqrxtcf-bbb55063",
    user=snowflake_creds.USER_NAME,
    password=snowflake_creds.PASSWORD,
    role="ACCOUNTADMIN",
    warehouse="COMPUTE_WH",
    database="HEALTHDB",
    schema="HEALTHSCHEMA"
))

In [29]:
def scoring_query(a):
    query_sim = f"""

    WITH BASE AS (

        SELECT CASE_ID,
               COALESCE(HOSPITAL_CODE,0) AS HOSPITAL_CODE,
               COALESCE(HOSPITAL_TYPE_CODE,'None') AS HOSPITAL_TYPE_CODE,
               COALESCE(CITY_CODE_HOSPITAL,0) AS CITY_CODE_HOSPITAL,
               COALESCE(HOSPITAL_REGION_CODE,'None') AS HOSPITAL_REGION_CODE,
               COALESCE(AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL,0) AS AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL,
               COALESCE(DEPARTMENT,'None') AS DEPARTMENT,
               COALESCE(WARD_TYPE,'None') AS WARD_TYPE,
               COALESCE(WARD_FACILITY_CODE,'None') AS WARD_FACILITY_CODE,
               COALESCE(BED_GRADE,0) AS BED_GRADE,
               PATIENTID,
               COALESCE(CITY_CODE_PATIENT,0) AS CITY_CODE_PATIENT,
               COALESCE(TYPE_OF_ADMISSION,'None') AS TYPE_OF_ADMISSION,
               COALESCE(SEVERITY_OF_ILLNESS,'Minor') AS SEVERITY_OF_ILLNESS,
               COALESCE(VISITORS_WITH_PATIENT,0) AS VISITORS_WITH_PATIENT,
               COALESCE(AGE,'None') AS AGE,
               COALESCE(ADMISSION_DEPOSIT,0) AS ADMISSION_DEPOSIT,
               ADMISSION_DATE,
               DISCHARGE_DATE

        FROM HEALTHDB.HEALTHSCHEMA.SIMULATION_DATA

    ),

    BASE_WITH_FEATURES AS (

        SELECT *,
                MONTHNAME(ADMISSION_DATE) AS ADMISSION_MONTH,
                DAYNAME(ADMISSION_DATE) AS ADMISSION_DAY,
                CONCAT(TYPE_OF_ADMISSION,'-',SEVERITY_OF_ILLNESS) AS ADMISSION_ILLNESS,
                CONCAT(SEVERITY_OF_ILLNESS,'-',BED_GRADE) AS ILLNESS_BEDGRADE,
                CONCAT(DEPARTMENT,'-',SEVERITY_OF_ILLNESS) AS DEPARTMENT_ILLNESS,
                DATEDIFF(day,ADMISSION_DATE,DISCHARGE_DATE) AS LOS
        FROM BASE 

    )    

    SELECT * FROM BASE_WITH_FEATURES WHERE ADMISSION_DATE = CURRENT_DATE+{a}-46
    """
    return query_sim

In [30]:
def check_n_create_model_features(df,feat_list):
    test = pd.DataFrame()
    for col in feat_list:
        if col in df.columns.tolist():
            test[col] = df[col]
        else:
            test[col] = 0
    
    return test

In [31]:
def insert_predictions_to_snowflake_table(data):
    import pandas
    import snowflake.connector
    from snowflake.connector.pandas_tools import pd_writer, write_pandas

    engine = create_engine(URL(
        account="gqrxtcf-bbb55063",
        user=snowflake_creds.USER_NAME,
        password=snowflake_creds.PASSWORD,
        role="ACCOUNTADMIN",
        warehouse="COMPUTE_WH",
        database="HEALTHDB",
        schema="HEALTHSCHEMA"
    ))
    
    # Creating the logging table if not exists already
    table = 'TEMP_LOS_PREDICTION_MODEL_LOGGING_TABLE_HARI'
    
    # Inserting the data to snowflake logging table
    data.to_sql(table, engine, index=False, if_exists='append', method=pd_writer)
    return 'Success'

In [32]:
def send_status_mail(mail_string):
    import mail_creds
    import smtplib
    from email.mime.multipart import MIMEMultipart
    from email.mime.text import MIMEText
    from email.mime.multipart import MIMEMultipart
    from email.mime.base import MIMEBase
    from email.mime.text import MIMEText
    from email.utils import formatdate
    from email import encoders
    
    subject = 'Patient LOS Prediction - STATUS MAIL'
    mail_content = mail_string

    username= mail_creds.MAIL_ID
    password= mail_creds.APP_PASSWORD
    send_from = mail_creds.MAIL_ID
    send_to = mail_creds.MAIL_ID
    Cc = ''
    msg = MIMEMultipart()
    msg['From'] = send_from
    msg['To'] = send_to
    msg['Cc'] = Cc
    msg['Date'] = formatdate(localtime = True)
    msg['Subject'] = subject
    msg.attach(MIMEText(mail_content, 'plain'))
    smtp = smtplib.SMTP('smtp.gmail.com',587)
    smtp.ehlo()
    smtp.starttls()
    smtp.login(username,password)
    smtp.sendmail(send_from, send_to.split(',') + msg['Cc'].split(','), msg.as_string())
    smtp.quit()    

In [None]:
def LOS_MODEL_DEPLOYMENT_SIM(x):
    try:
        import pickle
        import pandas as pd
        import numpy as np
        import sqlalchemy
        import snowflake.connector
        from sqlalchemy import create_engine

        import xgboost as xgb
        from datetime import datetime, timedelta
        import time  
        import pytz    
        tz_NY = pytz.timezone('Asia/Kolkata')

        import warnings
        warnings.filterwarnings('ignore')   
        
        import snowflake_creds
        import LOS_Preprocessing
        from LOS_Preprocessing import preprocess_data
        
        engine = create_engine(URL(
                account="gqrxtcf-bbb55063",
                user=snowflake_creds.USER_NAME,
                password=snowflake_creds.PASSWORD,
                role="ACCOUNTADMIN",
                warehouse="COMPUTE_WH",
                database="HEALTHDB",
                schema="HEALTHSCHEMA"
            ))
        
        mail_list = []
        
        with engine.connect() as conn:
    
            # Loading the scoring data
            score_data = pd.DataFrame(pd.read_sql(scoring_query(x), conn))
            score_data.columns = [col.upper() for col in score_data.columns.tolist()]
            print(score_data['ADMISSION_DATE'].max())
            mail_list.append('STEP-1: Loading Data complete')

            # Applying the preprocessing steps
            score_data_processed = LOS_Preprocessing.preprocess_data(score_data)
            mail_list.append('STEP-2: Applying the Preprocessing Steps complete')

            # Retain CASE_ID in the processed data
            score_data_processed['CASE_ID'] = score_data['CASE_ID']
            
            # Applying feature selection
            final_feats = pd.read_pickle('MODEL_FEATS.pkl')
            score_data_final = check_n_create_model_features(score_data_processed, final_feats)
            mail_list.append('STEP-3: Applying the Feature Selection Steps complete')

            # Ensure CASE_ID is included in score_data_final
            score_data_final['CASE_ID'] = score_data_processed['CASE_ID']

            # Getting the predictions
            model = xgboost.XGBRegressor()
            model.load_model('MODEL_XGB.model')
            score_data_final['PREDICTED_LOS'] = np.ceil(model.predict(score_data_final.drop(columns=[col for col in ['LOS', 'CASE_ID'] if col in score_data_final.columns])))
            mail_list.append('STEP-4: Getting the Predictions complete')

            # Writing the dataframe to Snowflake as a table
            score_data_final = score_data_final.reset_index(drop=True)
            score_data_table = pd.merge(score_data, score_data_final, on='CASE_ID', how='left')
            status = insert_predictions_to_snowflake_table(score_data_table)
            mail_list.append('STEP-5: Writing the data to Snowflake complete')
            
            # Creating the mail body
            mail_string = ",\n ".join(map(str, mail_list))
            send_status_mail(mail_string)
            print('Success')
    
    except Exception as e:
        mail_content = f'Schedule failed: {e}'
        send_status_mail(mail_content)
        print(f'Failed: {e}')

In [42]:
LOS_MODEL_DEPLOYMENT_SIM(1)

Failed: (snowflake.connector.errors.DatabaseError) 250001 (08001): Failed to connect to DB: tr09543.ap-south-1.snowflakecomputing.com:443. Incorrect username or password was specified.
(Background on this error at: https://sqlalche.me/e/20/4xp6)


In [None]:
# Scheduling the notebook (for live scoring simulation)

import time
from datetime import datetime    
import pytz    
tz_NY = pytz.timezone('Asia/Kolkata')   
 

# scheduled hours in 24-hour format
hours_list = ["10:10","17:17"]
 
for i in range(1,31):
#     tz_NY = pytz.timezone('Asia/Kolkata')   
#     now = datetime.now(tz_NY)  # gets current datetime

#     hour = str(now.hour) # gets current hour
#     minute = str(now.minute) # gets current minute
#     current_time = f"{hour}:{minute}" # combines current hour and minute

#     # checks if current time is in the hours list
#     if current_time in hours_list:
#         LOS_MODEL_DEPLOYMENT()
    
    # Run the scoring script
    LOS_MODEL_DEPLOYMENT_SIM(i)
    
    #time.sleep(60) # waits a minute until it repeats
