In [23]:
import pandas as pd
import sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
from time import time

In [2]:
#### Load Data
df = pd.read_csv("SIT788_4_1_Data/data1.csv")

In [3]:
print(df.isnull().sum())
display(df)

f1       0
f2       0
f3       0
f4       0
f5       0
f6       0
f7       0
f8       0
f9       0
f10      0
f11      0
f12      0
f13      0
f14      0
class    0
dtype: int64


Unnamed: 0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,class
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,1
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,1
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,1
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,1
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,1
32557,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,0
32558,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,1
32559,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,1


#### Handle categorical variables

In [4]:
labelEncoder = preprocessing.LabelEncoder()
df = df.apply(labelEncoder.fit_transform)
display(df)

Unnamed: 0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,class
0,22,7,2671,9,12,4,1,1,4,1,25,0,39,39,1
1,33,6,2926,9,12,2,4,0,4,1,0,0,12,39,1
2,21,4,14086,11,8,0,6,1,4,1,0,0,39,39,1
3,36,4,15336,1,6,2,6,0,2,1,0,0,39,39,1
4,11,4,19355,9,12,2,10,5,2,0,0,0,39,5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,10,4,16528,7,11,2,13,5,4,0,0,0,37,39,1
32557,23,4,8080,11,8,2,7,0,4,1,0,0,39,39,0
32558,41,4,7883,11,8,6,1,4,4,0,0,0,39,39,1
32559,5,4,12881,11,8,4,1,3,4,1,0,0,19,39,1


#### Split into train and test

In [5]:
X, Y = df.loc[:, (df.columns != 'class')], df[['class']]
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=42)

#### Train best model (decision tree)

In [6]:
clf_dt = DecisionTreeClassifier()

millis_a = int(time() * 1000)
clf_dt.fit(x_train, y_train)
millis_b = int(time() * 1000)
dif = millis_b - millis_a
print ("Decsion Tree train time: ", dif)

millis_a = int(time() * 1000)
y_pred_dt = clf_dt.predict(x_test)
millis_b = int(time() * 1000)
dif = millis_b - millis_a
avg_t = dif / len(x_test)
print ("Decsion Tree time per prediction: ", avg_t)

Decsion Tree train time:  178
Decsion Tree time per prediction:  0.0009212344541685859


In [7]:
print(accuracy_score(y_test, y_pred_dt))
print(confusion_matrix(y_test, y_pred_dt))

0.812375249500998
[[ 992  579]
 [ 643 4299]]


#### Deploy to Azure Workspace

#### Install deps

In [8]:
#!pip install azureml-core
#!pip install --upgrade azureml-core

#### Import Azure ML python SDK

In [14]:
import azureml.core
print(azureml.core.VERSION)
from azureml.core import Workspace
from azureml.core.model import Model


1.40.0


#### Connect to created Workspace

In [10]:
ws = Workspace.get(name="singhprate-workspace", subscription_id='d5d0e7cd-5900-4c80-820c-b24c2a8416d0', 
                   resource_group='PRATEEK-SINGH')

#### Dump trained model

In [12]:
joblib.dump(clf_dt, "SIT788_4_1_Data/decision_tree_v1.pkl")

['SIT788_4_1_Data/decision_tree_v1.pkl']

#### Register model on Workspace

In [24]:
model = Model.register(model_path="SIT788_4_1_Data/decision_tree_v1.pkl",
                       model_name="decision_tree_classification_model",
                       model_framework=Model.Framework.SCIKITLEARN,
                       model_framework_version=sklearn.__version__,
                       tags={'type': "classification"},
                       description="Decsion Tree binary classification model",
                       workspace=ws)

Registering model decision_tree_classification_model


#### Deploy model to workspace

In [25]:
service_name = 'decision-tree-service'
service = Model.deploy(ws, service_name, [model], overwrite=True)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-04-17 15:18:32+05:30 Creating Container Registry if not exists..
2022-04-17 15:28:32+05:30 Registering the environment.
2022-04-17 15:28:33+05:30 Uploading autogenerated assets for no-code-deployment.
2022-04-17 15:28:34+05:30 Building image..
2022-04-17 15:32:30+05:30 Generating deployment configuration.
2022-04-17 15:32:34+05:30 Submitting deployment to compute..
2022-04-17 15:32:49+05:30 Checking the status of deployment decision-tree-service..
2022-04-17 15:33:42+05:30 Checking the status of deployment decision-tree-service..
2022-04-17 15:34:42+05:30 Checking the status of inference endpoint decision-tree-service.
2022-04-17 15:34:44+05:30 Checking the status of inference endpoint decision-tree-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"


#### Test on dummy data

In [31]:
import json

input_payload = json.dumps({
    'data': x_test[0:2].values.tolist(),
    'method': 'predict'  # If you have a classification model, you can get probabilities by changing this to 'predict_proba'.
})

output = service.run(input_payload)

print(output)



{'predict': [1, 1]}


#### Delete service after use

In [32]:
service.delete()