## Step 1: Load workspace config.

In [2]:
from azureml.core import Workspace, Dataset, Experiment

subscription_id = 'ef5d99e9-a1b7-46a0-a883-bc5f0eb4941a'
resource_group = 'mlops-aug-batch'
workspace_name = 'intellipaat-mlops'

workspace = Workspace(subscription_id, resource_group, workspace_name)



## Step 2: Start experiment

In [3]:
experiment = Experiment(workspace=workspace, name="iris")
dataset = Dataset.get_by_name(workspace=workspace, name="Iris")

df = dataset.to_pandas_dataframe()
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [3]:
df.head(4)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa


In [4]:
X = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
Y = df['Species']

In [8]:
X

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [9]:
Y

0         Iris-setosa
1         Iris-setosa
2         Iris-setosa
3         Iris-setosa
4         Iris-setosa
            ...      
145    Iris-virginica
146    Iris-virginica
147    Iris-virginica
148    Iris-virginica
149    Iris-virginica
Name: Species, Length: 150, dtype: object

## Step 3: Create DT Classifier

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
import joblib


In [6]:
run = experiment.start_logging()
test_size = 0.40

run.log("train_test_split",test_size)
run.log("Type of Algorithm", "Decision Tree")

X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=test_size, shuffle=True)

clf = DecisionTreeClassifier(criterion="gini", max_depth=4)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)

run.log("accuracy",accuracy*100)


model_name = "iris_decision_tree.pkl"
filepath = "outputs/"+model_name

joblib.dump(value= clf, filename=filepath)
run.upload_file(name=model_name, path_or_stream=filepath)
run.complete()

## Step 4: Register the Model

In [9]:
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration
import sklearn

In [8]:
model = Model.register(
    workspace=workspace,
    model_name = "iris",
    model_path = filepath,
    model_framework= Model.Framework.SCIKITLEARN,
    model_framework_version= sklearn.__version__,
    description= "This is a sample DT classifier",
    resource_configuration= ResourceConfiguration(cpu=1,memory_in_gb=0.5)
)

print("Name: ", model.name)
print("version: ", model.version)

Registering model iris
Name:  iris
version:  3
