# Model Training

We will be training and building the model, based on the feature engineered data that is stored in the table. The entire training 
is done using Snowpark optimized warehouse and using stored procedure. Hence all the operations will be natively executed in Snowflake.

In [2]:
from IPython.display import display, HTML, Image , Markdown
from snowflake.snowpark.session import Session
import snowflake.snowpark.types as T
import snowflake.snowpark.functions as F
import os ,configparser ,json ,logging

# Import the commonly defined utility scripts using
# dynamic path include
import sys
sys.path.append('../python/lutils')
import sflk_base as L

display(Markdown("### Initialization"))
logging.basicConfig(stream=sys.stdout, level=logging.ERROR)

# Source various helper functions
%run ./scripts/notebook_helpers.py

# Define the project home directory, this is used for locating the config.ini file
PROJECT_HOME_DIR = '../../'
config = L.get_config(PROJECT_HOME_DIR)
sp_session = L.connect_to_snowflake(PROJECT_HOME_DIR)

if(sp_session == None):
    raise Exception(f'Unable to connect to snowflake. Validate connection information ')

sp_session.use_role(f'''{config['APP_DB']['role']}''')
sp_session.use_schema(f'''{config['APP_DB']['database']}.{config['APP_DB']['schema']}''')
sp_session.use_warehouse(f'''{config['APP_DB']['snow_opt_wh']}''')

df = sp_session.sql('select current_user() ,current_role() ,current_database() ,current_schema();').to_pandas()
display(df)

### Initialization

Unnamed: 0,CURRENT_USER(),CURRENT_ROLE(),CURRENT_DATABASE(),CURRENT_SCHEMA()
0,VSEKAR,PUBLIC,INDSOL_DICOM_DB,PUBLIC


In [5]:
df = sp_session.table('image_parsed_raw').sample(n=5).to_pandas()
display(df)

Unnamed: 0,SEQ_NO,IMAGE_FILEPATH,PARSING_STATUS,PARSING_EXCEPTION,CLASS_LABEL,CLASS_LABEL_NUM,IMAGE_ARRAY_SHAPE_0,IMAGE_ARRAY_SHAPE_1,IMAGE_ARRAY,NORMALIZED_IMAGE_ARRAY,RESIZED_FEATURE
0,4421,./data/val/NORMAL/IM-0326-0001.jpeg,True,,NORMAL,0,150,150,"[\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n ...","[\n 0.000000000000000e+00,\n 0.0000000000000...","[\n 0.000000000000000e+00,\n 0.0000000000000..."
1,4050,./data/val/PNEUMONIA/person292_virus_600.jpeg,True,,PNEUMONIA,1,150,150,"[\n 153,\n 151,\n 148,\n 152,\n 149,\n 1...","[\n 6.000000000000000e-01,\n 5.9215686274509...","[\n 6.000000000000000e-01,\n 5.9215686274509..."
2,3319,./data/train/NORMAL/IM-0704-0001.jpeg,True,,NORMAL,0,150,150,"[\n 49,\n 48,\n 63,\n 67,\n 64,\n 71,\n ...","[\n 1.921568627450981e-01,\n 1.8823529411764...","[\n 1.921568627450981e-01,\n 1.8823529411764..."
3,4225,./data/val/NORMAL/IM-0308-0001.jpeg,True,,NORMAL,0,150,150,"[\n 121,\n 114,\n 112,\n 112,\n 120,\n 1...","[\n 4.745098039215686e-01,\n 4.4705882352941...","[\n 4.745098039215686e-01,\n 4.4705882352941..."
4,1752,./data/train/PNEUMONIA/person846_virus_1491.jpeg,True,,PNEUMONIA,1,150,150,"[\n 0,\n 8,\n 22,\n 62,\n 55,\n 58,\n 7...","[\n 0.000000000000000e+00,\n 3.1372549019607...","[\n 0.000000000000000e+00,\n 3.1372549019607..."


In [6]:
import time

image_count = 10*1000
epochs = 3
display(Markdown("Model training ..."))

t = time.process_time()
stmt = f''' call train_pneumonia_identification_model(
        {image_count} 
        ,'@model_stg' 
        ,'{config['APP_DB']['model_flname']}' 
        ,{epochs}); '''
print(stmt)
out_df = sp_session.sql(stmt).collect()
elapsed_time = (time.process_time() - t) #/60

print(f'Total execution time for training: {elapsed_time} minutes')
print(out_df)

Model training ...

 call train_pneumonia_identification_model(
        10000 
        ,'@model_stg' 
        ,'pneumonia_model.joblib' 
        ,3); 
Total execution time for training: 0.05588499999999996 minutes
[Row(TRAIN_PNEUMONIA_IDENTIFICATION_MODEL='{\n  "logs": [\n    "load data ...",\n    "train and split ...",\n    " LEN train test val : 675 / 3155 / 676",\n    "reshape data ...",\n    "reshape data 3 ...",\n    " Shapeof Train / Val : (675, 1) / (676, 1)",\n    "reshape data 2.1 ...",\n    " Shapeof Train / Val : (675, 150, 150, 1) / (675,)",\n    "reshape data 2.3 ...",\n    " Shapeof Train / Val : (676, 150, 150, 1) / (676,)",\n    "instantiate data generator ...",\n    "define model pipeline ...",\n    "model fit ...",\n    "save model ...",\n    "Finished"\n  ]\n}')]


In [26]:
import pandas as pd

rows = sp_session.sql(f''' list @model_stg; ''').collect()
data = []
for r in rows:
    data.append({
        'name': r['name']
        ,'size': r['size']
        ,'last_modified': r['last_modified']
    })

df = pd.json_normalize(data)
display(df)



Unnamed: 0,name,size,last_modified
0,model_stg/pneumonia_model.joblib,95047952,"Tue, 20 Dec 2022 10:09:37 GMT"


--- 
### Closeout

    With that we are finished this section of the demo setup

In [9]:
sp_session.close()
print('Finished!!!')

Finished!!!
