Copyright (c) Microsoft Corporation. All rights reserved.

# Tutorial: Train your first model

## Connect to Workspace

In [1]:
# from azureml.core import Workspace
# workspace = Workspace.from_config()
##Insert Snippet

## Create an experiment

In [2]:
# from azureml.core import Experiment
# experiment = Experiment(workspace, "diabetes-expr")
##Insert Snippet

## Load data and prepare for training

In [3]:
from azureml.opendatasets import Diabetes
from sklearn.model_selection import train_test_split

x_df = Diabetes.get_tabular_dataset().to_pandas_dataframe().dropna()
y_df = x_df.pop("Y")

X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=66)

## Explore Data

In [4]:
x_df.head()

Unnamed: 0,AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6
0,59,2,32.1,101.0,157,93.2,38.0,4.0,4.8598,87
1,48,1,21.6,87.0,183,103.2,70.0,3.0,3.8918,69
2,72,2,30.5,93.0,156,93.6,41.0,4.0,4.6728,85
3,24,1,25.3,84.0,198,131.4,40.0,5.0,4.8903,89
4,50,1,23.0,101.0,192,125.4,52.0,4.0,4.2905,80


In [5]:
print(x_df.dtypes)
x_df.describe()

AGE      int64
SEX      int64
BMI    float64
BP     float64
S1       int64
S2     float64
S3     float64
S4     float64
S5     float64
S6       int64
dtype: object


Unnamed: 0,AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6
count,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0
mean,48.5181,1.468326,26.375792,94.647014,189.140271,115.43914,49.788462,4.070249,4.641411,91.260181
std,13.109028,0.499561,4.418122,13.831283,34.608052,30.413081,12.934202,1.29045,0.522391,11.496335
min,19.0,1.0,18.0,62.0,97.0,41.6,22.0,2.0,3.2581,58.0
25%,38.25,1.0,23.2,84.0,164.25,96.05,40.25,3.0,4.2767,83.25
50%,50.0,1.0,25.7,93.0,186.0,113.0,48.0,4.0,4.62005,91.0
75%,59.0,2.0,29.275,105.0,209.75,134.5,57.75,5.0,4.9972,98.0
max,79.0,2.0,42.2,133.0,301.0,242.4,99.0,9.09,6.107,124.0


## Train a model on Notebooks

In [6]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
import math

alphas = [0.1, 2, 0.3, 4, 0.5, 6, 0.7, 8, 0.9, 1.0]

for alpha in alphas:
    run = experiment.start_logging()
    run.log("alpha_value", alpha)
    print("alpha_value", alpha )
    run = experiment.start_logging()
    model = Ridge(alpha=alpha)
    model.fit(X=X_train, y=y_train)
    y_pred = model.predict(X=X_test)
    rmse = math.sqrt(mean_squared_error(y_true=y_test, y_pred=y_pred))
    # run.log("rmse", rmse)
    print("rmse", rmse)
    
    model_name = "model_alpha_" + str(alpha) + ".pkl"
    filename = "outputs/" + model_name
    
    joblib.dump(value=model, filename=filename)
    
    run.upload_file(name=model_name, path_or_stream=filename)
    run.complete()




alpha_value 0.1
rmse 56.60520331339142
alpha_value 2
rmse 56.73537363452705
alpha_value 0.3
rmse 56.61624324548363
alpha_value 4
rmse 56.89443476255977
alpha_value 0.5
rmse 56.628177342751364
alpha_value 6
rmse 57.0481879046065
alpha_value 0.7
rmse 56.64087521475942
alpha_value 8
rmse 57.18930922902951
alpha_value 0.9
rmse 56.654222996253125
alpha_value 1.0
rmse 56.66110898499054


## Go to Experiments Tab in the Azure ML Workspace Menu
