Skip to content

Commit 319cae6

Browse files
authored
Making changes to experiment notebook based on changes to tutorial document (#249)
* making changes to notebook based on changes to tutorial document * test update to assert_almost_equal * reference repo details in byoc guide
1 parent 27afdbb commit 319cae6

File tree

4 files changed

+97
-79
lines changed

4 files changed

+97
-79
lines changed

bootstrap/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ To use this existing project structure and scripts for your new ML project, you
44

55
Bootstrapping will prepare a directory structure for your project which includes:
66

7-
* renaming files and folders from the base project name `diabetes` to your project name
7+
* renaming files and folders from the base project name `diabetes_regression` to your project name
88
* fixing imports and absolute path based on your project name
99
* deleting and cleaning up some directories
1010

diabetes_regression/training/test_train.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def test_train_model():
1010
reg_model = train_model(data, {"alpha": 1.2})
1111

1212
preds = reg_model.predict([[1], [2]])
13-
np.testing.assert_equal(preds, [9.93939393939394, 9.03030303030303])
13+
np.testing.assert_almost_equal(preds, [9.93939393939394, 9.03030303030303])
1414

1515

1616
def test_get_model_metrics():

docs/custom_model.md

+6-4
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
This document provides steps to follow when using this repository as a template to train models and deploy the models with real-time inference in Azure ML with your own scripts and data.
44

5-
1. Follow the MLOpsPython [Getting Started](https://github.com/microsoft/MLOpsPython/blob/master/docs/getting_started.md) guide
6-
1. Follow the MLOpsPython [bootstrap instructions](https://github.com/microsoft/MLOpsPython/blob/master/bootstrap/README.md) to create your project starting point
5+
1. Follow the MLOpsPython [Getting Started](getting_started.md) guide
6+
1. Follow the MLOpsPython [bootstrap instructions](../bootstrap/README.md) to create your project starting point
77
1. Configure training data
88
1. [If necessary] Convert your ML experimental code into production ready code
99
1. Replace the training code
@@ -13,11 +13,13 @@ This document provides steps to follow when using this repository as a template
1313

1414
## Follow the Getting Started guide
1515

16-
Follow the [Getting Started](https://github.com/microsoft/MLOpsPython/blob/master/docs/getting_started.md) guide to set up the infrastructure and pipelines to execute MLOpsPython.
16+
Follow the [Getting Started](getting_started.md) guide to set up the infrastructure and pipelines to execute MLOpsPython.
17+
18+
Take a look at the [Repo Details](code_description.md) document for a description of the structure of this repository.
1719

1820
## Follow the Bootstrap instructions
1921

20-
The [Bootstrap from MLOpsPython repository](https://github.com/microsoft/MLOpsPython/blob/master/bootstrap/README.md) guide will help you to quickly prepare the repository for your project.
22+
The [Bootstrap from MLOpsPython repository](../bootstrap/README.md) guide will help you to quickly prepare the repository for your project.
2123

2224
**Note:** Since the bootstrap script will rename the `diabetes_regression` folder to the project name of your choice, we'll refer to your project as `[project name]` when paths are involved.
2325

experimentation/Diabetes Ridge Regression Training.ipynb

+89-73
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,16 @@
1616
},
1717
{
1818
"cell_type": "code",
19-
"execution_count": 2,
19+
"execution_count": 1,
2020
"metadata": {},
2121
"outputs": [],
2222
"source": [
2323
"from sklearn.datasets import load_diabetes\n",
2424
"from sklearn.linear_model import Ridge\n",
2525
"from sklearn.metrics import mean_squared_error\n",
2626
"from sklearn.model_selection import train_test_split\n",
27-
"import joblib"
27+
"import joblib\n",
28+
"import pandas as pd"
2829
]
2930
},
3031
{
@@ -36,16 +37,21 @@
3637
},
3738
{
3839
"cell_type": "code",
39-
"execution_count": 3,
40+
"execution_count": 6,
4041
"metadata": {},
4142
"outputs": [],
4243
"source": [
43-
"X, y = load_diabetes(return_X_y=True)"
44+
"sample_data = load_diabetes()\n",
45+
"\n",
46+
"df = pd.DataFrame(\n",
47+
" data=sample_data.data,\n",
48+
" columns=sample_data.feature_names)\n",
49+
"df['Y'] = sample_data.target"
4450
]
4551
},
4652
{
4753
"cell_type": "code",
48-
"execution_count": 4,
54+
"execution_count": 7,
4955
"metadata": {},
5056
"outputs": [
5157
{
@@ -57,29 +63,12 @@
5763
}
5864
],
5965
"source": [
60-
"print(X.shape)"
61-
]
62-
},
63-
{
64-
"cell_type": "code",
65-
"execution_count": 5,
66-
"metadata": {},
67-
"outputs": [
68-
{
69-
"name": "stdout",
70-
"output_type": "stream",
71-
"text": [
72-
"(442,)\n"
73-
]
74-
}
75-
],
76-
"source": [
77-
"print(y.shape)"
66+
"print(df.shape)"
7867
]
7968
},
8069
{
8170
"cell_type": "code",
82-
"execution_count": 8,
71+
"execution_count": 11,
8372
"metadata": {},
8473
"outputs": [
8574
{
@@ -103,16 +92,17 @@
10392
" <thead>\n",
10493
" <tr style=\"text-align: right;\">\n",
10594
" <th></th>\n",
106-
" <th>0</th>\n",
107-
" <th>1</th>\n",
108-
" <th>2</th>\n",
109-
" <th>3</th>\n",
110-
" <th>4</th>\n",
111-
" <th>5</th>\n",
112-
" <th>6</th>\n",
113-
" <th>7</th>\n",
114-
" <th>8</th>\n",
115-
" <th>9</th>\n",
95+
" <th>age</th>\n",
96+
" <th>sex</th>\n",
97+
" <th>bmi</th>\n",
98+
" <th>bp</th>\n",
99+
" <th>s1</th>\n",
100+
" <th>s2</th>\n",
101+
" <th>s3</th>\n",
102+
" <th>s4</th>\n",
103+
" <th>s5</th>\n",
104+
" <th>s6</th>\n",
105+
" <th>Y</th>\n",
116106
" </tr>\n",
117107
" </thead>\n",
118108
" <tbody>\n",
@@ -128,19 +118,21 @@
128118
" <td>4.420000e+02</td>\n",
129119
" <td>4.420000e+02</td>\n",
130120
" <td>4.420000e+02</td>\n",
121+
" <td>442.000000</td>\n",
131122
" </tr>\n",
132123
" <tr>\n",
133124
" <td>mean</td>\n",
134-
" <td>-3.639623e-16</td>\n",
135-
" <td>1.309912e-16</td>\n",
136-
" <td>-8.013951e-16</td>\n",
137-
" <td>1.289818e-16</td>\n",
138-
" <td>-9.042540e-17</td>\n",
139-
" <td>1.301121e-16</td>\n",
140-
" <td>-4.563971e-16</td>\n",
141-
" <td>3.863174e-16</td>\n",
142-
" <td>-3.848103e-16</td>\n",
143-
" <td>-3.398488e-16</td>\n",
125+
" <td>-3.634285e-16</td>\n",
126+
" <td>1.308343e-16</td>\n",
127+
" <td>-8.045349e-16</td>\n",
128+
" <td>1.281655e-16</td>\n",
129+
" <td>-8.835316e-17</td>\n",
130+
" <td>1.327024e-16</td>\n",
131+
" <td>-4.574646e-16</td>\n",
132+
" <td>3.777301e-16</td>\n",
133+
" <td>-3.830854e-16</td>\n",
134+
" <td>-3.412882e-16</td>\n",
135+
" <td>152.133484</td>\n",
144136
" </tr>\n",
145137
" <tr>\n",
146138
" <td>std</td>\n",
@@ -154,6 +146,7 @@
154146
" <td>4.761905e-02</td>\n",
155147
" <td>4.761905e-02</td>\n",
156148
" <td>4.761905e-02</td>\n",
149+
" <td>77.093005</td>\n",
157150
" </tr>\n",
158151
" <tr>\n",
159152
" <td>min</td>\n",
@@ -167,6 +160,7 @@
167160
" <td>-7.639450e-02</td>\n",
168161
" <td>-1.260974e-01</td>\n",
169162
" <td>-1.377672e-01</td>\n",
163+
" <td>25.000000</td>\n",
170164
" </tr>\n",
171165
" <tr>\n",
172166
" <td>25%</td>\n",
@@ -180,6 +174,7 @@
180174
" <td>-3.949338e-02</td>\n",
181175
" <td>-3.324879e-02</td>\n",
182176
" <td>-3.317903e-02</td>\n",
177+
" <td>87.000000</td>\n",
183178
" </tr>\n",
184179
" <tr>\n",
185180
" <td>50%</td>\n",
@@ -193,6 +188,7 @@
193188
" <td>-2.592262e-03</td>\n",
194189
" <td>-1.947634e-03</td>\n",
195190
" <td>-1.077698e-03</td>\n",
191+
" <td>140.500000</td>\n",
196192
" </tr>\n",
197193
" <tr>\n",
198194
" <td>75%</td>\n",
@@ -206,6 +202,7 @@
206202
" <td>3.430886e-02</td>\n",
207203
" <td>3.243323e-02</td>\n",
208204
" <td>2.791705e-02</td>\n",
205+
" <td>211.500000</td>\n",
209206
" </tr>\n",
210207
" <tr>\n",
211208
" <td>max</td>\n",
@@ -219,42 +216,52 @@
219216
" <td>1.852344e-01</td>\n",
220217
" <td>1.335990e-01</td>\n",
221218
" <td>1.356118e-01</td>\n",
219+
" <td>346.000000</td>\n",
222220
" </tr>\n",
223221
" </tbody>\n",
224222
"</table>\n",
225223
"</div>"
226224
],
227225
"text/plain": [
228-
" 0 1 2 3 4 \\\n",
226+
" age sex bmi bp s1 \\\n",
229227
"count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n",
230-
"mean -3.639623e-16 1.309912e-16 -8.013951e-16 1.289818e-16 -9.042540e-17 \n",
228+
"mean -3.634285e-16 1.308343e-16 -8.045349e-16 1.281655e-16 -8.835316e-17 \n",
231229
"std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n",
232230
"min -1.072256e-01 -4.464164e-02 -9.027530e-02 -1.123996e-01 -1.267807e-01 \n",
233231
"25% -3.729927e-02 -4.464164e-02 -3.422907e-02 -3.665645e-02 -3.424784e-02 \n",
234232
"50% 5.383060e-03 -4.464164e-02 -7.283766e-03 -5.670611e-03 -4.320866e-03 \n",
235233
"75% 3.807591e-02 5.068012e-02 3.124802e-02 3.564384e-02 2.835801e-02 \n",
236234
"max 1.107267e-01 5.068012e-02 1.705552e-01 1.320442e-01 1.539137e-01 \n",
237235
"\n",
238-
" 5 6 7 8 9 \n",
239-
"count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n",
240-
"mean 1.301121e-16 -4.563971e-16 3.863174e-16 -3.848103e-16 -3.398488e-16 \n",
241-
"std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n",
242-
"min -1.156131e-01 -1.023071e-01 -7.639450e-02 -1.260974e-01 -1.377672e-01 \n",
243-
"25% -3.035840e-02 -3.511716e-02 -3.949338e-02 -3.324879e-02 -3.317903e-02 \n",
244-
"50% -3.819065e-03 -6.584468e-03 -2.592262e-03 -1.947634e-03 -1.077698e-03 \n",
245-
"75% 2.984439e-02 2.931150e-02 3.430886e-02 3.243323e-02 2.791705e-02 \n",
246-
"max 1.987880e-01 1.811791e-01 1.852344e-01 1.335990e-01 1.356118e-01 "
236+
" s2 s3 s4 s5 s6 \\\n",
237+
"count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n",
238+
"mean 1.327024e-16 -4.574646e-16 3.777301e-16 -3.830854e-16 -3.412882e-16 \n",
239+
"std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n",
240+
"min -1.156131e-01 -1.023071e-01 -7.639450e-02 -1.260974e-01 -1.377672e-01 \n",
241+
"25% -3.035840e-02 -3.511716e-02 -3.949338e-02 -3.324879e-02 -3.317903e-02 \n",
242+
"50% -3.819065e-03 -6.584468e-03 -2.592262e-03 -1.947634e-03 -1.077698e-03 \n",
243+
"75% 2.984439e-02 2.931150e-02 3.430886e-02 3.243323e-02 2.791705e-02 \n",
244+
"max 1.987880e-01 1.811791e-01 1.852344e-01 1.335990e-01 1.356118e-01 \n",
245+
"\n",
246+
" Y \n",
247+
"count 442.000000 \n",
248+
"mean 152.133484 \n",
249+
"std 77.093005 \n",
250+
"min 25.000000 \n",
251+
"25% 87.000000 \n",
252+
"50% 140.500000 \n",
253+
"75% 211.500000 \n",
254+
"max 346.000000 "
247255
]
248256
},
249-
"execution_count": 8,
257+
"execution_count": 11,
250258
"metadata": {},
251259
"output_type": "execute_result"
252260
}
253261
],
254262
"source": [
255-
"import pandas as pd\n",
256-
"features = pd.DataFrame(X)\n",
257-
"features.describe()"
263+
"# All data in a single dataframe\n",
264+
"df.describe()"
258265
]
259266
},
260267
{
@@ -266,11 +273,15 @@
266273
},
267274
{
268275
"cell_type": "code",
269-
"execution_count": 3,
276+
"execution_count": 12,
270277
"metadata": {},
271278
"outputs": [],
272279
"source": [
273-
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n",
280+
"X = df.drop('Y', axis=1).values\n",
281+
"y = df['Y'].values\n",
282+
"\n",
283+
"X_train, X_test, y_train, y_test = train_test_split(\n",
284+
" X, y, test_size=0.2, random_state=0)\n",
274285
"data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
275286
" \"test\": {\"X\": X_test, \"y\": y_test}}"
276287
]
@@ -284,7 +295,7 @@
284295
},
285296
{
286297
"cell_type": "code",
287-
"execution_count": 4,
298+
"execution_count": 16,
288299
"metadata": {},
289300
"outputs": [
290301
{
@@ -294,16 +305,19 @@
294305
" normalize=False, random_state=None, solver='auto', tol=0.001)"
295306
]
296307
},
297-
"execution_count": 4,
308+
"execution_count": 16,
298309
"metadata": {},
299310
"output_type": "execute_result"
300311
}
301312
],
302313
"source": [
303-
"alpha = 0.5\n",
314+
"# experiment parameters\n",
315+
"args = {\n",
316+
" \"alpha\": 0.5\n",
317+
"}\n",
304318
"\n",
305-
"reg = Ridge(alpha=alpha)\n",
306-
"reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])"
319+
"reg_model = Ridge(**args)\n",
320+
"reg_model.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])"
307321
]
308322
},
309323
{
@@ -315,20 +329,22 @@
315329
},
316330
{
317331
"cell_type": "code",
318-
"execution_count": 6,
332+
"execution_count": 18,
319333
"metadata": {},
320334
"outputs": [
321335
{
322336
"name": "stdout",
323337
"output_type": "stream",
324338
"text": [
325-
"mse: 3298.9096058070622\n"
339+
"{'mse': 3298.9096058070622}\n"
326340
]
327341
}
328342
],
329343
"source": [
330-
"preds = reg.predict(data[\"test\"][\"X\"])\n",
331-
"print(\"mse: \", mean_squared_error(preds, y_test))"
344+
"preds = reg_model.predict(data[\"test\"][\"X\"])\n",
345+
"mse = mean_squared_error(preds, y_test)\n",
346+
"metrics = {\"mse\": mse}\n",
347+
"print(metrics)"
332348
]
333349
},
334350
{
@@ -363,9 +379,9 @@
363379
],
364380
"metadata": {
365381
"kernelspec": {
366-
"display_name": "Python (storedna)",
382+
"display_name": "Python 3",
367383
"language": "python",
368-
"name": "storedna"
384+
"name": "python3"
369385
},
370386
"language_info": {
371387
"codemirror_mode": {
@@ -377,7 +393,7 @@
377393
"name": "python",
378394
"nbconvert_exporter": "python",
379395
"pygments_lexer": "ipython3",
380-
"version": "3.6.9"
396+
"version": "3.7.4"
381397
}
382398
},
383399
"nbformat": 4,

0 commit comments

Comments
 (0)