In [8]:
from azureml.core import Workspace, Dataset, Experiment

print("Accessing the workspace...")
ws = Workspace.from_config("./config")

print("Accessing the dataset...")
az_dataset = Dataset.get_by_name(ws, "adultincome")

print("Accessing/Creating the experiment...")
experiment = Experiment(workspace = ws, name="Webservice-exp001")

print("Stant Experiment using Start Logging method...")
new_run = experiment.start_logging()

Accessing the workspace...
Accessing the dataset...
Accessing/Creating the experiment...
Stant Experiment using Start Logging method...


In [9]:
import pandas as pd

print("Loading the dataset to pandas dataframe...")
df = az_dataset.to_pandas_dataframe()

X = df.iloc[:, :-1]
Y = df.iloc[:, -1:]

X = pd.get_dummies(X)

train_enc_cols = X.columns

Y = pd.get_dummies(Y)
Y = Y.iloc[:,-1]

from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1234, stratify=Y)

from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(random_state=1234)

trained_model = rfc.fit(X_train, Y_train)

Y_predict = rfc.predict(X_test)
Y_prob = rfc.predict_proba(X_test)[:, 1]
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(Y_test, Y_predict)
score = rfc.score(X_test, Y_test)

new_run.log("accuracy", score)

import joblib
model_file = "./outputs/models.pkl"
joblib.dump(value=[train_enc_cols, trained_model], filename=model_file)

new_run.complete()

Loading the dataset to pandas dataframe...
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}


In [12]:
list(experiment.get_runs())

[Run(Experiment: Webservice-exp001,
 Id: f6a36767-5303-4f3c-b7d8-a71c93f0e2fe,
 Type: None,
 Status: Completed),
 Run(Experiment: Webservice-exp001,
 Id: 0a962bf6-c2f8-4e35-899d-aef9f74e8917,
 Type: None,
 Status: Completed),
 Run(Experiment: Webservice-exp001,
 Id: b4050751-a15f-4c8b-a066-2fc810319c8e,
 Type: None,
 Status: Running)]

In [7]:
new_run.get_metrics()

{'accuracy': 0.8560753403623708}