In [1]:
## Import relevant modules
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import pandas as pd
from urllib2 import Request, urlopen
from json import dumps, loads

from features import feature_extraction

In [2]:
## Grab sample data from ParkWhiz API
api_url = 'http://api.parkwhiz.com/v4/quotes?q=coordinates:41.8857256,-87.6369590'
api_key = '&api_key=62d882d8cfe5680004fa849286b6ce20'
times = [
    '&start_time=2017-04-14T15:00&end_time=2017-04-14T18:00',
    '&start_time=2017-05-14T15:00&end_time=2017-05-14T18:00',
    '&start_time=2017-06-14T15:00&end_time=2017-06-14T18:00',
    '&start_time=2017-07-14T15:00&end_time=2017-07-14T18:00',
    '&start_time=2017-08-14T15:00&end_time=2017-08-14T18:00',
    '&start_time=2017-09-14T15:00&end_time=2017-09-14T18:00',
    '&start_time=2017-12-23T15:00&end_time=2017-12-23T18:00',
]

frames = []

for time in times:    
    req = Request(api_url + api_key + time)
    page = urlopen(req)
    content = page.read()
    json_data = loads(content)
    frames.append(feature_extraction(json_data))
    
frame = pd.concat(frames)
frame = frame.drop_duplicates('id')
frame.head()

Unnamed: 0,id,name,lat,lon,coordinates,address,city,seller_id,price,accessible,valet,security,restrooms,printed_pass,reentry_allowed
0,8343,150 N. Wacker Dr. - Valet,41.8848,-87.6372,"[41.8848377337, -87.6372286677]",150 N. Upper Wacker Dr.,Chicago,624,12,0,1,0,0,0,0
1,6460,Franklin & Lake Self Park,41.8852,-87.6356,"[41.885230118, -87.6355737448]",180 N. Franklin St.,Chicago,652,15,1,0,1,0,0,0
2,3871,225 W. Wacker Dr. Building,41.8859,-87.6353,"[41.8859381122, -87.6352616959]",281 N. Franklin St.,Chicago,602,10,1,0,0,0,0,0
3,9529,319 W. Randolph - Lot,41.8843,-87.636,"[41.8842716176, -87.6360226795]",319 W. Randolph St.,Chicago,652,30,1,0,0,0,0,0
4,3186,165 N. Canal St. - Valet Kiosk,41.8851,-87.6397,"[41.885097127, -87.6396544576]",165 N. Canal St.,Chicago,211,10,1,1,0,0,0,0


In [3]:
## Grab spot features like location, security, restrooms, and the like
df_train = frame[['lat', 'lon', 'accessible', 'valet', 'security', 'restrooms']]

In [4]:
## Take a look at sample data
df_train.head()

Unnamed: 0,lat,lon,accessible,valet,security,restrooms
0,41.8848,-87.6372,0,1,0,0
1,41.8852,-87.6356,1,0,1,0
2,41.8859,-87.6353,1,0,0,0
3,41.8843,-87.636,1,0,0,0
4,41.8851,-87.6397,1,1,0,0


In [5]:
## Ensemble. One for location using haversine distance and the other for categorical data with jaccard metric
## X_d_train = geographic distance knnR 
## X_c_train = categorical knnR
X_d_train, X_d_test, y_d_train, y_d_test = train_test_split(df_train[['lat', 'lon']],frame['price'],test_size=0.2,random_state=574)
X_c_train, X_c_test, y_c_train, y_c_test = train_test_split(df_train[['accessible', 'valet', 'security', 'restrooms']],frame['price'],test_size=0.2,random_state=574)

In [6]:
## Init. models using 3 nearest neighbors 
knnRDistance = KNeighborsRegressor(n_neighbors=3, metric='haversine')
knnRCategorical = KNeighborsRegressor(n_neighbors=3, metric='jaccard')

In [7]:
## Fit models using training data
knnRDistance.fit(X_d_train,y_d_train)
knnRCategorical.fit(X_c_train,y_c_train)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='jaccard',
          metric_params=None, n_jobs=1, n_neighbors=3, p=2,
          weights='uniform')

In [8]:
## Examine performance of each metric
y_d_pred = knnRDistance.predict(X_d_test)
y_c_pred = knnRCategorical.predict(X_c_test)

print mean_squared_error(y_d_pred, y_d_test)
print mean_squared_error(y_c_pred, y_c_test)

30.2243589744
49.6752136752


In [None]:
## Import Watson ML modules
## From here we run in DSx; modules do not exist locally
from repository.mlrepositoryclient import MLRepositoryClient
from repository.mlrepositoryartifact import MLRepositoryArtifact
from repository.mlrepository import MetaProps, MetaNames

In [None]:
## Set credentials
wml_credentials = {
  "url": "xxx",
  "access_key": "xxx",
  "username": "xxx"
  "password": "xxx",
  "instance_id": "xxx"
}

In [None]:
## Authorize client 
ml_repository_client = MLRepositoryClient(wml_credentials['url'])
ml_repository_client.authorize(wml_credentials['username'], wml_credentials['password'])

In [None]:
## Create model artifact
props = MetaProps({MetaNames.AUTHOR_NAME:"Aleksandar Velkoski", MetaNames.AUTHOR_EMAIL:"avelkoski@ethventures.io"})
model_artifact = MLRepositoryArtifact(knnR, name="SpotExchange KNeighborsRegressor", meta_props=props)

In [None]:
## Save model artifact
saved_model = ml_repository_client.models.save(model_artifact)

In [None]:
## Grab new ML token
import urllib3, requests, json

headers = urllib3.util.make_headers(basic_auth='{username}:{password}'.format(username=wml_credentials['username'], password=wml_credentials['password']))
url = '{}/v3/identity/token'.format(wml_credentials['url'])
response = requests.get(url, headers=headers)
mltoken = json.loads(response.text).get('token')

In [None]:
## Get model instance of published model
endpoint_instance = wml_credentials['url'] + "/v3/wml_instances/" + wml_credentials['instance_id']
header = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + mltoken} 

response_get_instance = requests.get(endpoint_instance, headers=header)

In [None]:
## Get endpoint of published model 
endpoint_published_models = json.loads(response_get_instance.text).get('entity').get('published_models').get('url')

In [None]:
## Print published model
print endpoint_published_models

In [None]:
## Get response
header = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + mltoken}
response_get = requests.get(endpoint_published_models, headers=header)

In [None]:
## Print ML token
print mltoken

In [None]:
## Grab deployments
[endpoint_deployments] = [x.get('entity').get('deployments').get('url') for x in json.loads(response_get.text).get('resources') if x.get('metadata').get('guid') == saved_model.uid]

In [None]:
## Add payload 
payload_online = {"name": "knnR-SpotExchange", "description": "KNearestNeighbor Regressor for SpotExchange Price Predictions", "type": "online"}
response_online = requests.post(endpoint_deployments, json=payload_online, headers=header)

In [None]:
## Grab scoring URL
scoring_url = json.loads(response_online.text).get('entity').get('scoring_url')

print(scoring_url)

In [None]:
## Create sample lat, lon to test model
values = [[41.88523011803571, -87.63557374477386],
       [41.88593811224804, -87.6352616958502],
       [41.88427161764708, -87.63602267950773]]

In [None]:
## Set scoring payload
payload_scoring = {"values": values}

In [None]:
## Score data
response_scoring = requests.post(scoring_url, json=payload_scoring, headers=header)

In [None]:
## Print response
print response_scoring.text