### Deng AI Azure Models and Training
#### Environment and Data Prep for this Model

In [351]:
#Load the workspace from the config file
from azureml.core import Workspace

ws = Workspace.from_config(path='.azureml/ws_config.json')
print(ws.name, "loaded")

Azure-ML-WS loaded


In [352]:
#set up variable to contain input data folder
inputdata_folder='inputdata'

In [353]:
%%writefile $inputdata_folder/create_rfr_datasets.py

import pandas as pd
from sklearn.preprocessing import RobustScaler
from azureml.core import Workspace,Datastore,Dataset,Run
import argparse
import os

#Set run context and workspace
run=Run.get_context()
ws=run.experiment.workspace
default_ds = ws.get_default_datastore()

# Get PipelineData argument
parser = argparse.ArgumentParser()
parser.add_argument('--folder', type=str, dest='folder')
args = parser.parse_args()
output_folder = args.folder

#Define the fields used for each city
sj_features=[
    'year',
    'yearcount',
    'weekofyear',
    'station_max_temp_c',
    'station_min_temp_c',
    'cum_rain_prior_24_wks',
    'avg_max_temp_prior_22_wks',
    'total_cases'
]

sj_lags={
    'year':0,
    'yearcount':0,
    'weekofyear':0,
    'station_max_temp_c':0,
    'station_min_temp_c':0,
    'cum_rain_prior_24_wks':46,
    'avg_max_temp_prior_22_wks':0,
    'total_cases':0
}

iq_features=[
    'year',
    'yearcount',
    'weekofyear',
    'reanalysis_min_air_temp_k',
    'station_max_temp_c',
    'cum_rain_prior_22_wks',
    'total_cases'
]

iq_lags={
    'year':0,
    'yearcount':0,
    'weekofyear':0,
    'reanalysis_min_air_temp_k':0,
    'station_max_temp_c':0,
    'cum_rain_prior_22_wks':43,
    'total_cases':0
}

#Define a function to retrieve the features to be used in the model for each specific city
def get_feature_list(city,lag_names=True):
    if city=='sj':
        feature_list=[]
        if lag_names==True:
            feature_list=sj_features
            for key, value in sj_lags.items():
                for i in range(value): feature_list.append(str(key)+'_shift_'+str(i))
        else:
            for key, value in sj_lags.items(): feature_list.append(str(key))
    elif city=='iq':
        feature_list=[]
        if lag_names==True:
            feature_list=iq_features
            for key, value in iq_lags.items():
                for i in range(value): feature_list.append(str(key)+'_shift_'+str(i))
        else:
            for key, value in iq_lags.items(): feature_list.append(str(key))
                
    return feature_list

#Define a function to create a set of time-lagged features based on the feature and the desired lag
def create_lag_features(df,lag,end_col=0):
    for i in range(lag):
        df_lag=df.iloc[:,:end_col]
        df_lag=df_lag.shift(periods=i)
        df=df.join(df_lag,rsuffix='_shift_'+str(i))
    
    df=df.iloc[lag:,:]
    df.reset_index(inplace=True,drop=True)
    
    return df

#create sets for each city
def prep_for_model(city,lookback):
    #get train and test for sj or iq
    if city=='sj':
        train_all_sj_ds = ws.datasets.get('dengue-train-all-sj-ds')
        holdout_all_sj_ds = ws.datasets.get('dengue-holdout-all-sj-ds')
        df=train_all_sj_ds.to_pandas_dataframe()
        df_h=holdout_all_sj_ds.to_pandas_dataframe()
        df_h['total_cases']=0
    elif city=='iq':
        train_all_iq_ds = ws.datasets.get('dengue-train-all-iq-ds')
        holdout_all_iq_ds = ws.datasets.get('dengue-holdout-all-iq-ds')
        df=train_all_iq_ds.to_pandas_dataframe()
        df_h=holdout_all_iq_ds.to_pandas_dataframe()
        df_h['total_cases']=0
    
    #create single dataset
    df_all=df.append(df_h,ignore_index=True)

    #Get the lists of features to train and reduce the df to those
    training_feature_list=[]
    city_feature_list=get_feature_list(city,lag_names=False)
    for i in range(len(city_feature_list)):training_feature_list.append(city_feature_list[i])
    df_all_lag=df_all[training_feature_list].copy()

    #Create lagged data
    df_all_lag=create_lag_features(df_all_lag,lag=lookback,end_col=df_all_lag.shape[1])

    #Reduce features to just the ones needed for training plus the lagged versions of the features since we need 2d dataset
    training_feature_list=[]
    city_feature_list=get_feature_list(city,lag_names=True)
    for i in range(len(city_feature_list)):training_feature_list.append(city_feature_list[i])
    df_all_lag=df_all_lag[training_feature_list].copy()

    #Break out the label data so it does not get scaled and the drop the values for holdout since they are all 0
    y=df_all_lag['total_cases']
    y=y[:df.shape[0]-lookback]
    df_all_lag.drop(columns=['total_cases'],inplace=True)

    #scale features using desired scaler
    scaler=RobustScaler()
    df_all_lag=scaler.fit_transform(df_all_lag)

    #break out the holdout file from the input file
    np_df=df_all_lag[:df.shape[0]-lookback,:]
    np_df_h=df_all_lag[df.shape[0]-lookback:,:]

    return np_df, np_df_h, y

#Create the datasets for each city and save to intermediate data file for model use
np_sj,np_sj_h,y_sj=prep_for_model(city='sj',lookback=50)
df_sj=pd.DataFrame(np_sj)
df_sj_holdout=pd.DataFrame(np_sj_h)
df_y_sj=pd.DataFrame(y_sj)

# Save prepped data to the PipelineData location for sj
os.makedirs(output_folder, exist_ok=True)
train_sj_output_path = os.path.join(output_folder, 'train_sj_scaled.csv')
df_sj.to_csv(train_sj_output_path,index=False)

test_sj_output_path = os.path.join(output_folder, 'holdout_sj_scaled.csv')
df_sj_holdout.to_csv(test_sj_output_path,index=False)

y_sj_output_path = os.path.join(output_folder, 'y_sj.csv')
df_y_sj.to_csv(y_sj_output_path,index=False)

#Create the datasets for each city and save to intermediate data file for model use
np_iq,np_iq_h,y_iq=prep_for_model(city='iq',lookback=50)
df_iq=pd.DataFrame(np_iq)
df_iq_holdout=pd.DataFrame(np_iq_h)
df_y_iq=pd.DataFrame(y_iq)

# Save prepped data to the PipelineData location for iq
train_iq_output_path = os.path.join(output_folder, 'train_iq_scaled.csv')
df_iq.to_csv(train_iq_output_path,index=False)

test_iq_output_path = os.path.join(output_folder, 'holdout_iq_scaled.csv')
df_iq_holdout.to_csv(test_iq_output_path,index=False)

y_iq_output_path = os.path.join(output_folder, 'y_iq.csv')
df_y_iq.to_csv(y_iq_output_path,index=False)

### Create reusable datasets for the scaled holdout data. These will be needed to make predictions once the models are deployed
default_ds.upload_files(files=[test_sj_output_path],
                    target_path='dengueAI/inputdata',
                    overwrite=True, 
                    show_progress=True)

#Create a tabular dataset from the path on the datastore for the file
tab_test_sj_rfr_ds = Dataset.Tabular.from_delimited_files(path=(default_ds, 'dengueAI/inputdata/holdout_sj_scaled.csv'))


# Register the tabular dataset
try:
    tab_test_sj_rfr_ds = tab_test_sj_rfr_ds.register(workspace=ws, 
                            name='test-sj-rfr-ds',
                            description='Holdout data scaled for SJ RFR model',
                            tags = {'format':'CSV'},
                            create_new_version=True)
    print('Dataset registered.')
except Exception as ex:
    print(ex)

    

default_ds.upload_files(files=[test_iq_output_path],
                    target_path='dengueAI/inputdata',
                    overwrite=True, 
                    show_progress=True)

#Create a tabular dataset from the path on the datastore for the file
tab_test_iq_rfr_ds = Dataset.Tabular.from_delimited_files(path=(default_ds, 'dengueAI/inputdata/holdout_iq_scaled.csv'))

# Register the tabular dataset
try:
    tab_test_iq_rfr_ds = tab_test_iq_rfr_ds.register(workspace=ws, 
                            name='test-iq-rfr-ds',
                            description='Holdout data scaled for IQ RFR model',
                            tags = {'format':'CSV'},
                            create_new_version=True)
    print('Dataset registered.')
except Exception as ex:
    print(ex)



run.complete

Overwriting inputdata/create_rfr_datasets.py


#### Create and Register the Model
##### Model for SJ

In [334]:
%%writefile $inputdata_folder/rfr_train_sj.py
#Import libraries
from azureml.core import Run
import argparse
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import joblib
import warnings
warnings.filterwarnings('ignore')

#Set run context
run=Run.get_context()

#Get PipelineData argument
parser = argparse.ArgumentParser()
parser.add_argument('--folder', type=str, dest='folder')
parser.add_argument('--model_folder',type=str,dest='model_folder')
args = parser.parse_args()
data_folder = args.folder
model_folder= args.model_folder

#create a dataframe for each dataset, train and holdout
df_sj=pd.read_csv(data_folder+'/train_sj_scaled.csv')
df_sj_h=pd.read_csv(data_folder+'/holdout_sj_scaled.csv')
df_sj_y=pd.read_csv(data_folder+'/y_sj.csv')

#split the training set into train and test
x_train, x_test, y_train, y_test = train_test_split(df_sj, df_sj_y, test_size=0.30, random_state=0)

#create the model
rfr=RandomForestRegressor(n_estimators=300,max_depth=10)
rfr.fit(x_train,y_train)

#score the model
score=rfr.score(x_test,y_test)
print('SJ score: ',score)
run.log('SJ score: ',np.float(score))

#calculate MAE
y_hat=rfr.predict(x_test)
mae=mean_absolute_error(y_hat,y_test)
print('SJ MAE: ',mae)
run.log('SJ MAE: ',np.float(mae))

# Save the trained model
os.makedirs(model_folder, exist_ok=True)
output_path = model_folder + "/sj_rfr_model.pkl"
joblib.dump(value=rfr, filename=output_path)

run.complete()


Overwriting inputdata/rfr_train_sj.py


In [335]:
%%writefile $inputdata_folder/register_rfr_sj.py
# Import libraries
import argparse
import joblib
from azureml.core import Workspace, Model, Run

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument('--model_folder', type=str, dest='model_folder')
args = parser.parse_args()
model_folder = args.model_folder
print('Model folder',str(model_folder))

#Get the experiment run context
run = Run.get_context()

#load and register the model
print('Loading model from ' + model_folder)
model_file = model_folder + '/sj_rfr_model.pkl'
model = joblib.load(model_file)

Model.register(workspace=run.experiment.workspace,
               model_path = model_file,
               model_name = 'sj_rfr_model',
               tags={'Training context':'Pipeline'})

run.complete()

Overwriting inputdata/register_rfr_sj.py


##### Model for IQ

In [354]:
%%writefile $inputdata_folder/rfr_train_iq.py
#Import libraries
from azureml.core import Run
import argparse
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import joblib
import warnings
warnings.filterwarnings('ignore')

#Set run context
run=Run.get_context()

# Get PipelineData argument
parser = argparse.ArgumentParser()
parser.add_argument('--folder', type=str, dest='folder')
parser.add_argument('--model_folder',type=str,dest='model_folder')
args = parser.parse_args()
data_folder = args.folder
model_folder= args.model_folder

#create a dataframe for each dataset, train and holdout
df_iq=pd.read_csv(data_folder+'/train_iq_scaled.csv')
df_iq_h=pd.read_csv(data_folder+'/holdout_iq_scaled.csv')
df_iq_y=pd.read_csv(data_folder+'/y_iq.csv')

#split the training set into train and test
x_train, x_test, y_train, y_test = train_test_split(df_iq, df_iq_y, test_size=0.30, random_state=0)

#create the model
rfr=RandomForestRegressor(n_estimators=300,max_depth=10)
rfr.fit(x_train,y_train)

#score the model
score=rfr.score(x_test,y_test)
print('IQ score: ',score)
run.log('IQ score: ',np.float(score))

#calculate MAE
y_hat=rfr.predict(x_test)
mae=mean_absolute_error(y_hat,y_test)
print('IQ MAE: ',mae)
run.log('IQ MAE: ',np.float(mae))

# Save the trained model
os.makedirs(model_folder, exist_ok=True)
output_path = model_folder + "/iq_rfr_model.pkl"
joblib.dump(value=rfr, filename=output_path)

run.complete()


Overwriting inputdata/rfr_train_iq.py


In [355]:
%%writefile $inputdata_folder/register_rfr_iq.py
# Import libraries
import argparse
import joblib
from azureml.core import Workspace, Model, Run

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument('--model_folder', type=str, dest='model_folder')
args = parser.parse_args()
model_folder = args.model_folder
print('Model folder',str(model_folder))

# Get the experiment run context
run = Run.get_context()

# load the model
print('Loading model from ' + model_folder)
model_file = model_folder + '/iq_rfr_model.pkl'
model = joblib.load(model_file)

Model.register(workspace=run.experiment.workspace,
               model_path = model_file,
               model_name = 'iq_rfr_model',
               tags={'Training context':'Pipeline'})

run.complete()

Overwriting inputdata/register_rfr_iq.py


#### Create Compute Environment for Model Pipeline
##### Compute Cluster

In [356]:
#Create a compute cluster if it does not exist
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import Workspace

ws = Workspace.from_config(path='.azureml/ws_config.json')

cluster_name = "DS-Comp-Cluster"

try:
    #Check for existing compute target
    pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    #If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        pipeline_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster, use it.


##### Python Environment on the Cluster

In [357]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration

#Create a Python environment for the experiment
dengue_env = Environment("dengue-pipeline-env")
dengue_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
dengue_env.docker.enabled = True # Use a docker container

#Create a set of package dependencies
dengue_packages = CondaDependencies.create(conda_packages=['scikit-learn','pandas'],
                                             pip_packages=['azureml-defaults','azureml-dataprep[pandas]','keras','tensorflow'])

#Add the dependencies to the environment
dengue_env.python.conda_dependencies = dengue_packages

#Register the environment (just in case you want to use it again)
dengue_env.register(workspace=ws)
registered_env = Environment.get(ws, 'dengue-pipeline-env')

#Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()

#Use the compute you created above. 
pipeline_run_config.target = pipeline_cluster

#Assign the environment to the run configuration
pipeline_run_config.environment = registered_env

print ("Run configuration created.")

Run configuration created.


#### Create Model Pipelines
##### Pipeline for SJ

In [358]:
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import PythonScriptStep,EstimatorStep
from azureml.train.estimator import Estimator

#Create a PipelineData object 
ws = Workspace.from_config(path='.azureml/ws_config.json')
data_store=ws.get_default_datastore()
dengueAI_datasets=PipelineData('deng_datasets',datastore=data_store)
model_folder=PipelineData('model_folder',datastore=data_store)

#create estimator to run the model
estimator = Estimator(source_directory=inputdata_folder,
                        compute_target = pipeline_cluster,
                        environment_definition=pipeline_run_config.environment,
                        entry_script='rfr_train_sj.py')

#Step 1, prepare data for the sj model by creating time-lagged features and scaling data
create_rfr_datasets = PythonScriptStep(name = 'Create SJ Datasets for RFR Model',
                                       source_directory = inputdata_folder,
                                       script_name = 'create_rfr_datasets.py',
                                       arguments = ['--folder', dengueAI_datasets],
                                       inputs=[],
                                       outputs=[dengueAI_datasets],
                                       compute_target = pipeline_cluster,
                                       runconfig = pipeline_run_config,
                                       allow_reuse = True)

#Step 2, create and train random forest regressor for sj
rfr_train_sj=EstimatorStep(name='Create sj random forest regressor model',
                             estimator=estimator,
                             estimator_entry_script_arguments = ['--folder',dengueAI_datasets,'--model_folder',model_folder],
                             inputs=[dengueAI_datasets],
                             outputs=[model_folder],
                             compute_target = pipeline_cluster,
                             allow_reuse = True)

#Step 3, register the model
register_rfr_sj=PythonScriptStep(name = 'Register RFR Model for SJ',
                                source_directory = inputdata_folder,
                                script_name = 'register_rfr_sj.py',
                                arguments = ['--model_folder',model_folder],
                                inputs=[model_folder],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)


print("Pipeline steps defined")

Pipeline steps defined


##### Run SJ Pipeline

In [344]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline

#Construct the pipeline
pipeline_steps=[create_rfr_datasets,rfr_train_sj,register_rfr_sj]
pipeline = Pipeline(workspace=ws,steps=pipeline_steps)
print("Pipeline is built.")

# Create an experiment and run the pipeline
experiment=Experiment(workspace=ws, name='dengue-sj-randomforest-pipeline')
pipeline_run=experiment.submit(pipeline,regenerate_outputs=True)
print("Pipeline submitted for execution.")
pipeline_run.wait_for_completion(show_output=False)

Pipeline is built.
Created step Create SJ Datasets for RFR Model [aff61d4f][2077972e-d39f-4187-aa9c-8171bef548f4], (This step will run and generate new outputs)
Created step Create sj random forest regressor model [08cdeb44][1dbc5798-6250-496f-96c4-2347ae8e5420], (This step will run and generate new outputs)Created step Register RFR Model for SJ [2aed1015][7be1dd08-bf58-4eac-a843-cf6e9fe264b9], (This step will run and generate new outputs)

Submitted PipelineRun eee3b7dd-fcdb-42eb-bbf4-3a1349075332
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/dengue-sj-randomforest-pipeline/runs/eee3b7dd-fcdb-42eb-bbf4-3a1349075332?wsid=/subscriptions/fd2d8de8-17e1-4976-9906-fdde487edd5f/resourcegroups/AzureML-Learning/workspaces/Azure-ML-WS
Pipeline submitted for execution.
PipelineRunId: eee3b7dd-fcdb-42eb-bbf4-3a1349075332
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/dengue-sj-randomforest-pipeline/runs/eee3b7dd-fcdb-42eb-bbf4-3a1349075332?wsid=/

'Finished'

##### Create Pipeline for IQ

In [359]:
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import PythonScriptStep,EstimatorStep
from azureml.train.estimator import Estimator

#Create a PipelineData object 
ws=Workspace.from_config(path='.azureml/ws_config.json')
data_store=ws.get_default_datastore()
dengueAI_datasets=PipelineData('deng_datasets',datastore=data_store)
model_folder=PipelineData('model_folder',datastore=data_store)


#create estimator to run the model
estimator = Estimator(source_directory=inputdata_folder,
                        compute_target = pipeline_cluster,
                        environment_definition=pipeline_run_config.environment,
                        entry_script='rfr_train_iq.py')

#Step 1, prepare data for the sj model by creating time-lagged features and scaling data
create_rfr_datasets = PythonScriptStep(name = 'Create IQ Datasets for RFR Model',
                                       source_directory = inputdata_folder,
                                       script_name = 'create_rfr_datasets.py',
                                       arguments = ['--folder', dengueAI_datasets],
                                       inputs=[],
                                       outputs=[dengueAI_datasets],
                                       compute_target = pipeline_cluster,
                                       runconfig = pipeline_run_config,
                                       allow_reuse = True)

#Step 2, create and train random forest regressor for sj
rfr_train_iq = EstimatorStep(name = 'Create iq random forest regressor model',
                             estimator=estimator,
                             estimator_entry_script_arguments = ['--folder',dengueAI_datasets,'--model_folder',model_folder],
                             inputs=[dengueAI_datasets],
                             outputs=[model_folder],
                             compute_target = pipeline_cluster,
                             allow_reuse = True)

#Step 3, register the model
register_rfr_iq = PythonScriptStep(name = 'Register RFR Model for IQ',
                                source_directory = inputdata_folder,
                                script_name = 'register_rfr_iq.py',
                                arguments = ['--model_folder', model_folder],
                                inputs=[model_folder],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)

print("Pipeline steps defined")

Pipeline steps defined


##### Run IQ Pipeline

In [360]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline

#Construct the pipeline
pipeline_steps=[create_rfr_datasets,rfr_train_iq,register_rfr_iq]
pipeline = Pipeline(workspace=ws,steps=pipeline_steps)
print("Pipeline is built.")

# Create an experiment and run the pipeline
experiment=Experiment(workspace=ws, name='dengue-iq-randomforest-pipeline')
pipeline_run=experiment.submit(pipeline,regenerate_outputs=True)
print("Pipeline submitted for execution.")
pipeline_run.wait_for_completion(show_output=False)

Pipeline is built.
Created step Create IQ Datasets for RFR Model [946fac25][be1a735d-9313-47bc-9f45-329ef79c5af0], (This step will run and generate new outputs)Created step Create iq random forest regressor model [863a065f][cf8f34ab-b3cb-4830-a27d-e5e4ee151862], (This step will run and generate new outputs)
Created step Register RFR Model for IQ [35b14d51][452b039a-19d7-4339-9685-dae6bff9262f], (This step will run and generate new outputs)

Submitted PipelineRun fcb8a3c6-ba27-46d9-9cfe-d9e2f21d7ff8
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/dengue-iq-randomforest-pipeline/runs/fcb8a3c6-ba27-46d9-9cfe-d9e2f21d7ff8?wsid=/subscriptions/fd2d8de8-17e1-4976-9906-fdde487edd5f/resourcegroups/AzureML-Learning/workspaces/Azure-ML-WS
Pipeline submitted for execution.
PipelineRunId: fcb8a3c6-ba27-46d9-9cfe-d9e2f21d7ff8
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/dengue-iq-randomforest-pipeline/runs/fcb8a3c6-ba27-46d9-9cfe-d9e2f21d7ff8?wsid=/

'Finished'

#### Deploy Models as an ACI Service


In [278]:
#Check to see that the model is there in the workspace
ws=Workspace.from_config(path='.azureml/ws_config.json')
sj_model = ws.models['sj_rfr_model']
iq_model=ws.models['iq_rfr_model']
print(sj_model.name, 'version', model.version)
print(iq_model.name, 'version', model.version)

sj_rfr_model version 6
iq_rfr_model version 6


In [291]:
#Create local folder to hold deployment scripts
import os

folder_name='dengue_service'

#Folder for the web service files
service_folder = './' + folder_name
os.makedirs(service_folder, exist_ok=True)

print(folder_name, 'folder created.')

#Set path for scoring script
sj_script_file=os.path.join(service_folder,'score_rfr_sj.py')
iq_script_file=os.path.join(service_folder,'score_rfr_iq.py')

dengue_service folder created.


##### SJ Service

In [317]:
%%writefile $sj_script_file
import json
import joblib
import numpy as np
from azureml.core.model import Model

#Called when the service is loaded
def init():
    global model
    #Get the path to the deployed model file and load it
    model_path=Model.get_model_path('sj_rfr_model')
    model=joblib.load(model_path)

#Called when a request is received
def run(raw_data):
    #Get the input data as a numpy array
    data = np.array(json.loads(raw_data)['data'])
    data = json.loads(raw_data)['data']
    
    #Get a prediction from the model. This will be a single week's case count
    predictions = (model.predict(data))
    predictions=predictions.tolist()
    
    #Return the predictions as JSON
    return json.dumps(predictions)


Overwriting ./dengue_service\score_rfr_sj.py


In [280]:
#Environment config for the compute hosting the ACI service
from azureml.core.conda_dependencies import CondaDependencies 
import os

#Add the dependencies for the model
myenv=CondaDependencies()
myenv.add_conda_package('scikit-learn')
myenv.add_conda_package('pandas')

#Save the environment config
env_file = os.path.join(experiment_folder,"dengue_env.yml")
with open(env_file,'w') as f:
    f.write(myenv.serialize_to_string())
print("Dependency info in", env_file)

#Print the env config file
with open(env_file,"r") as f:
    print(f.read())

Dependency info in ./dengue_service\dengue_env.yml
# Conda environment specification. The dependencies defined in this file will

# be automatically provisioned for runs with userManagedDependencies=False.


# Details about the Conda environment file format:

# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually


name: project_environment
dependencies:
  # The python interpreter version.

  # Currently Azure ML only supports 3.5.2 and later.

- python=3.6.2

- pip:
    # Required packages for AzureML execution, history, and data preparation.

  - azureml-defaults

- scikit-learn
- pandas
channels:
- anaconda
- conda-forge



In [309]:
#script to actually deploy the service
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig
from azureml.core.model import Model

#Config the scoring environment
inference_config=InferenceConfig(runtime='python',
                                   entry_script=sj_script_file,
                                   conda_file=env_file)

deployment_config=AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service_name='dengue-sj-rfr-service'

service=Model.deploy(ws,service_name,[sj_model],inference_config,deployment_config)

service.wait_for_deployment(True)

print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running......................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [303]:
#Remove the service
service.delete()

##### IQ Service

In [316]:
%%writefile $iq_script_file
import json
import joblib
import numpy as np
from azureml.core.model import Model

#Called when the service is loaded
def init():
    global model
    #Get the path to the deployed model file and load it
    model_path=Model.get_model_path('iq_rfr_model')
    model=joblib.load(model_path)

#Called when a request is received
def run(raw_data):
    #Get the input data as a numpy array
    data=np.array(json.loads(raw_data)['data'])
    data=json.loads(raw_data)['data']
    
    #Get a prediction from the model. This will be a single week's case count
    predictions=model.predict(data)
    predictions=predictions.tolist()
    
    #Return the predictions as JSON
    return json.dumps(predictions)

Overwriting ./dengue_service\score_rfr_iq.py


In [207]:
#script to actually deploy the service
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig
from azureml.core.model import Model

#Config the scoring environment
inference_config=InferenceConfig(runtime='python',
                                   entry_script=iq_script_file,
                                   conda_file=env_file)

deployment_config=AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service_name='dengue-iq-rfr-service'

service=Model.deploy(ws,service_name,[iq_model],inference_config,deployment_config)

service.wait_for_deployment(True)

print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running....................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


#### Consume Services and Make Predictions
##### Make Predictions with SJ Service

In [347]:
endpoint='http://bf0b8018-470a-4970-913c-ae8ab1965e73.centralus.azurecontainer.io/score'
key='ThLNPQHmamCIm1jwFXKhn7NqQLEOMwhh'

import json
import requests
import azureml.core
from azureml.core import Workspace, Dataset, Experiment, Run
import pandas as pd
import numpy as np


from azureml.core import Webservice
#get the current workspace
ws=Workspace.from_config(path='.azureml/ws_config.json')
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))
service=Webservice(ws,'dengue-sj-rfr-service')

#get the datastore
default_ds = ws.get_default_datastore()

#from the datastore, pull in the dengue test dataset
ds_h=ws.datasets.get('test-sj-rfr-ds')

#create a dataframe from the test dataset
df_h=ds_h.to_pandas_dataframe()

#create array to serialize to json
js_h=df_h.values.tolist()

#Convert the array to JSON format
input_json=json.dumps({"data": js_h})

#Call the web service directly, passing the input data. Doing it this way will provide detailed error message for debugging
#response = service.run(input_data = input_json)
# Get the predictions
#predictions = json.loads(response)
#print(len(predictions))

# Print the predicted class for each case.
#for i in range(len(js_h)):
#    print (predictions[i])

#Call the webservice via REST
#Set the content type and authentication for the request
request_headers = { "Content-Type":"application/json",
                    "Authorization":"Bearer " + key }

#Send the request
response=requests.post(endpoint, input_json, headers=request_headers)

#If we got a valid response, display the predictions
if response.status_code == 200:
    y_pred=[]
    y = json.loads(response.json())
    for i in range(len(js_h)):
        y_pred.append(int(y[i]))
    y_pred=pd.DataFrame(y_pred)
    y_pred.to_csv('outputdata/sj_y_pred.csv',index=False)
    print(len(y_pred),' predictions saved to file.')
else:
    print(response)

    


Ready to use Azure ML 1.18.0 to work with Azure-ML-WS
260  predictions saved to file.


##### Make Predictions with IQ Service

In [361]:
endpoint='http://c1ea61c5-aaac-4b70-9a3c-03a045b8ea72.centralus.azurecontainer.io/score'
key='ZIjvckMqZNDbqIb1Fufj9HlxDhkVSthU'

import json
import requests
import azureml.core
from azureml.core import Workspace, Dataset, Experiment, Run
import pandas as pd
import numpy as np


from azureml.core import Webservice
#get the current workspace
ws=Workspace.from_config(path='.azureml/ws_config.json')
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))
service=Webservice(ws,'dengue-iq-rfr-service')

#get the datastore
default_ds = ws.get_default_datastore()

#from the datastore, pull in the dengue test dataset
ds_h=ws.datasets.get('test-iq-rfr-ds')

#create a dataframe from the test dataset
df_h=ds_h.to_pandas_dataframe()

#create array to serialize to json
js_h=df_h.values.tolist()

#Convert the array to JSON format
input_json=json.dumps({"data": js_h})

#Call the web service directly, passing the input data. Doing it this way will provide detailed error message for debugging
#response = service.run(input_data = input_json)
# Get the predictions
#predictions = json.loads(response)
#print(len(predictions))

# Print the predicted class for each case.
#for i in range(len(js_h)):
#    print (predictions[i])

#Call the webservice via REST
#Set the content type and authentication for the request
request_headers = { "Content-Type":"application/json",
                    "Authorization":"Bearer " + key }

#Send the request
response=requests.post(endpoint, input_json, headers=request_headers)

#If we got a valid response, display the predictions
if response.status_code == 200:
    y_pred=[]
    y = json.loads(response.json())
    for i in range(len(js_h)):
        y_pred.append(int(y[i]))
    y_pred=pd.DataFrame(y_pred)
    y_pred.to_csv('outputdata/iq_y_pred.csv',index=False)
    print(len(y_pred),' predictions saved to file.')
else:
    print(response)

    

Ready to use Azure ML 1.18.0 to work with Azure-ML-WS
156  predictions saved to file.


#### Combine Files for Submission

In [369]:
from datetime import date

sj_model='sj-rfr'
iq_model='iq-rfr'
stamp=date.today().isoformat()

#Open the dowloaded prediction files for each city
sj_pred=pd.read_csv('outputdata/sj_y_pred.csv')
iq_pred=pd.read_csv('outputdata/iq_y_pred.csv')
df_submit=pd.read_csv('outputdata/submit_file.csv')

#create a single set of predictions
y_hat=np.array(sj_pred.append(iq_pred))

#add preds to the submit file
df_submit['total_cases']=y_hat

#save the submission file
df_submit.to_csv('outputdata/submit_file_'+sj_model+'_'+iq_model+'_'+stamp+'.csv',index=False)
