In [3]:
# Requires that Python, AzureMLSDK, Pandas, and NumPy are already installed
import azureml.core
import pandas as pd
import numpy as np
import logging

print("AzureML SDK Version:", azureml.core.VERSION)

AzureML SDK Version: 1.13.0


In [4]:
from azureml.core import Workspace, Experiment

# Requires that you download your config.json file from Azure Machine Learning to this folder
ws = Workspace.from_config()

# Can be named anything
experiment_name = "automl_bikeshare_forecast"

# This line will have output that requires you authenticate with Azure in the browser
experiment = Experiment(ws, experiment_name)

Performing interactive authentication. Please follow the instructions on the terminal.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.


In [6]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget

amlcompute_cluster_name = "cpu-cluster"
provisioning_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2",max_nodes=2)

compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)

compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=10)

Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [10]:
datastore = ws.get_default_datastore()

# Requires the bike-no.csv file in this folder
datastore.upload_files(files = ['bike-no.csv'], target_path='dataset/', overwrite=True, show_progress=True)

Uploading an estimated of 1 files
Uploading bike-no.csv
Uploaded bike-no.csv, 1 files out of an estimated total of 1
Uploaded 1 files


$AZUREML_DATAREFERENCE_6721a90895964b1598f76949bc12d83e

In [16]:
from azureml.core import Dataset
from datetime import datetime

time_column_name = 'date'
target_column_name = 'cnt'

dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, 'dataset/bike-no.csv')]).with_timestamp_columns(fine_grain_timestamp=time_column_name)

dataset.take(5).to_pandas_dataframe().reset_index(drop=True)

Unnamed: 0,instant,date,season,yr,mnth,weekday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,6,2,0.344167,0.363625,0.805833,0.160446,331,654,985
1,2,2011-01-02,1,0,1,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
2,3,2011-01-03,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
3,4,2011-01-04,1,0,1,2,1,0.2,0.212122,0.590435,0.160296,108,1454,1562
4,5,2011-01-05,1,0,1,3,1,0.226957,0.22927,0.436957,0.1869,82,1518,1600


In [17]:
#take 75% of the data leaving 25% for training
train = dataset.time_before(datetime(2012,8,31), include_boundary=True)
train.to_pandas_dataframe().tail(5).reset_index(drop=True)

Unnamed: 0,instant,date,season,yr,mnth,weekday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,605,2012-08-27,3,1,8,1,1,0.703333,0.654688,0.730417,0.128733,989,5928,6917
1,606,2012-08-28,3,1,8,2,1,0.728333,0.66605,0.62,0.190925,935,6105,7040
2,607,2012-08-29,3,1,8,3,1,0.685,0.635733,0.552083,0.112562,1177,6520,7697
3,608,2012-08-30,3,1,8,4,1,0.706667,0.652779,0.590417,0.077117,1172,6541,7713
4,609,2012-08-31,3,1,8,5,1,0.764167,0.6894,0.5875,0.168533,1433,5917,7350


In [18]:
#25% for training
test = dataset.time_after(datetime(2012,9,1), include_boundary=True)
test.to_pandas_dataframe().head(5).reset_index(drop=True)

Unnamed: 0,instant,date,season,yr,mnth,weekday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,610,2012-09-01,3,1,9,6,2,0.753333,0.702654,0.638333,0.113187,2352,3788,6140
1,611,2012-09-02,3,1,9,0,2,0.696667,0.649,0.815,0.064071,2613,3197,5810
2,612,2012-09-03,3,1,9,1,1,0.7075,0.661629,0.790833,0.151121,1965,4069,6034
3,613,2012-09-04,3,1,9,2,1,0.725833,0.686888,0.755,0.236321,867,5997,6864
4,614,2012-09-05,3,1,9,3,1,0.736667,0.708983,0.74125,0.187808,832,6280,7112
