So now we have a table of data. Let's slice it another way: plot one variable against another.

First let's do hostIDs by potential nightly income, i.e. the sum of the nightly rates for all properties hosted by that person. On this dataset we have hostID but no name.

In [None]:
import pandas
wfs_query = 'https://citydata.be.unsw.edu.au/geoserver/wfs?srsName=EPSG%3A4326&typename=geonode%3AInsideAirbnb_44_2015_17&outputFormat=csv&version=1.0.0&service=WFS&request=GetFeature'
maxFeatures_param = '&maxFeatures=10'
PropertyName_param = '&PropertyName=City,ScrapeDate,PropertyID,HostID,Latitude,Longitude,Price'
cql_filter_param = "&cql_filter=City='Sydney'+AND+ScrapeDate='2017-04-03T00:00:00'"

url = wfs_query + maxFeatures_param + PropertyName_param + cql_filter_param
listings = pandas.read_csv(url)

listings

# import plotly.plotly as py
# import plotly.graph_objs as go
# import plotly.offline as po

# trace1 = go.Scatter(x=[1,2,3], y=[4,5,6], marker={'color': 'red', 'symbol': 104, 'size': "10"}, 
#                     mode="markers+lines",  text=["one","two","three"], name='1st Trace')
                                               
# data=go.Data([trace1])
# layout=go.Layout(title="First Plot", xaxis={'title':'x1'}, yaxis={'title':'x2'})
# figure=go.Figure(data=data,layout=layout)
# po.iplot(figure, filename='pyguide_1')

In [None]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

print (__version__) # requires version >= 1.9.0

In [None]:
import plotly as py
import plotly.offline as po
# import download_plotlyjs, init_notebook_mode, plot, iplot

from plotly.graph_objs import Scatter, Layout

po.plot({
    "data": [Scatter(x=[1, 2, 3, 4], y=[4, 3, 2, 1])],
    "layout": Layout(title="hello world")
})

In [None]:
help(po.plot)

In [None]:
import plotly
import plotly.offline as po
from plotly.graph_objs import Scatter, Layout

po.init_notebook_mode(connected=True)

po.iplot({
    "data": [Scatter(x=[1, 2, 3, 4], y=[1, 2, 3, 1])],
    "layout": Layout(title="A")
})

In [None]:
help(po.iplot)

In [None]:
    from plotly.offline import init_notebook_mode, iplot
    init_notebook_mode()
    iplot([{'x': [1, 2, 3], 'y': [5, 2, 7]}])

Now let's plot our own data! :) 

In [None]:
import pandas
from plotly.offline import init_notebook_mode, iplot
import plotly.figure_factory as ff

data = pandas.read_csv('https://citydata.be.unsw.edu.au/geoserver/wfs?srsName=EPSG%3A4326&typename=geonode%3AInsideAirbnb_44_2015_17&outputFormat=csv&version=1.0.0&service=WFS&request=GetFeature&maxFeatures=10')


fig = ff.create_table(listings)

init_notebook_mode()
iplot(fig)

So the default format for iplot is a table. Fair enough. Let's make our first bar chart. HostID by potential nightly income.

Try a map now.

In [None]:
# import pandas
# from plotly.offline import init_notebook_mode, iplot
# import plotly.figure_factory as ff
# import plotly.graph_objs as go
# wfs_server = 'https://citydata.be.unsw.edu.au/geoserver/wfs'
# wfs_parms = 'srsName=EPSG%3A4326&typename=geonode%3AInsideAirbnb_44_2015_17&outputFormat=csv&version=1.0.0&service=WFS&request=GetFeature&'

# city = 'Sydney'
# cql_filter.......
# max_features = 1000
# max_features_parm = 'maxFeatures=' + str(max_features)
# url = wfs_server + '?' + wfs_parms + "&" + max_features_parm
# url
# data = pandas.read_csv(url)

trace = go.Scatter(
                    x=listings['Longitude'], y=listings['Latitude'], # Data
                    mode='markers', name='map' # Additional options
                   )
layout = go.Layout(title='Simple Map of Sydney Airbnb Listings 2017',
                   plot_bgcolor='rgb(230, 230,230)')

fig = go.Figure(data=[trace], layout=layout)

# Plot data in the notebook


# fig = ff.create_table(data.head ())

init_notebook_mode()
iplot(fig)


In [None]:
import pandas
from plotly.offline import init_notebook_mode, iplot
import plotly.figure_factory as ff
import plotly.graph_objs as go
wfs_query = 'https://citydata.be.unsw.edu.au/geoserver/wfs?srsName=EPSG%3A4326&typename=geonode%3AInsideAirbnb_44_2015_17&outputFormat=csv&version=1.0.0&service=WFS&request=GetFeature'
maxFeatures_param = '&maxFeatures=100'
PropertyName_param = '&PropertyName=PropertyID,HostID,Latitude,Longitude,Price'
cql_filter_param = "&cql_filter=City='Sydney'+AND+ScrapeDate='2017-04-03T00:00:00'"

url = wfs_query + PropertyName_param + cql_filter_param # + maxFeatures_param
listings = pandas.read_csv(url)

trace = go.Scatter(
                    x=listings['Longitude'], y=listings['Price'],
                    mode='markers', name='Price/Longitude' # Additional options
                   )

layout = go.Layout(title='Sydney Airbnb Price from West to East, 2017',
                   plot_bgcolor='rgb(230, 230,230)')

fig = go.Figure(data=[trace], layout=layout)

init_notebook_mode() # Plot data in the notebook

iplot(fig)

Now we want to query the whole dataset, not just  maxFeatures sample. But we don't actually need the whole dataset. We can ask for just the properties we want. And it's important that we be efficient because the URL request is the bottleneck for our program, it's the part that is slowing the whole thing down. Let's have a look at the architecture to be sure.

Insert Architecture (2) slides

First we need to learn about [Filtering and slicing a WFS query](Filtering and slicing a WFS query.ipynb).

Continue below after you have completed that notebook.

<a id="filtered-whole-dataset"></a>
## Reading a slice of the whole dataset
If you haven't done [Filtering and slicing a WFS query](Filtering and slicing a WFS query.ipynb) please do that now.

Now we'll add cql_filter and propertyname parameters to our URL. First just print the URL so we can click it and test it.

In [None]:
import pandas
from plotly.offline import init_notebook_mode, iplot
import plotly.figure_factory as ff
import plotly.graph_objs as go
wfs_server = 'https://citydata.be.unsw.edu.au/geoserver/wfs'
wfs_parms = 'srsName=EPSG%3A4326&typename=geonode%3AInsideAirbnb_44_2015_17&outputFormat=csv&version=1.0.0&service=WFS&request=GetFeature'

city = 'Sydney'
cql_filter_parm = 'cql_filter=City=\'' + city + '\''

properties = 'City,PropertyID,HostID,Latitude,Longitude,Price'
propname_parm = 'PropertyName=' + properties

max_features = 1000
max_features_parm = 'maxFeatures=' + str(max_features)

url = wfs_server + '?' + wfs_parms + "&" + cql_filter_parm + "&" + propname_parm + "&" + max_features_parm

print (url)

# data = pandas.read_csv(url)

# trace1 = go.Scatter(
#                     x=data['Longitude'], y=data['Latitude'], # Data
#                     mode='markers', name='logx' # Additional options
#                    )
# layout = go.Layout(title='Simple map from csv data',
#                    plot_bgcolor='rgb(230, 230,230)')

# fig = go.Figure(data=[trace1], layout=layout)

# # Plot data in the notebook


# # fig = ff.create_table(data.head ())

# init_notebook_mode()
# iplot(fig, filename='simple-plot-from-csv')


Click the link above. It should download a CSV file. Open the file. It should have about 57,000 rows with a selected set of columns.

In [None]:
import pandas
from plotly.offline import init_notebook_mode, iplot
import plotly.figure_factory as ff
import plotly.graph_objs as go
import logging
logging.getLogger().setLevel(logging.DEBUG)
# logging.getLogger().setLevel(logging.INFO)

wfs_server = 'https://citydata.be.unsw.edu.au/geoserver/wfs'
wfs_parms = 'srsName=EPSG%3A4326&typename=geonode%3AInsideAirbnb_44_2015_17&outputFormat=csv&version=1.0.0&service=WFS&request=GetFeature'

city = 'Sydney'
cql_filter_parm = 'cql_filter=City=\'' + city + '\''

properties = 'City,PropertyID,HostID,Latitude,Longitude,Price'
propname_parm = 'PropertyName=' + properties

max_features = 1000

url = wfs_server + '?' + wfs_parms + "&" + cql_filter_parm + "&" + propname_parm

if max_features > 0:
    max_features_parm = 'maxFeatures=' + str(max_features)
    url += "&" + max_features_parm

logging.debug('url: ' + url)

df = pandas.read_csv(url)

logging.debug('df.shape:' + str(df.shape))

logging.info('list df:' + str(list(df)))

trace1 = go.Scatter(
    x=df['HostID'],
    y=df['Price'],
    mode='markers',
    transforms = [dict(
        type = 'aggregate',
        groups = df['HostID'],
        aggregations = [dict(
            target = 'y',
            func = 'sum',
            enabled = True
        )]
    )]
)

layout = go.Layout(title='Simple map from csv data',
                   plot_bgcolor='rgb(230, 230,230)')

fig = go.Figure(data=[trace1], layout=layout)

# Plot data in the notebook

init_notebook_mode()
iplot(fig, filename='simple-plot-from-csv')


That didn't work. So I'll try it without graphic objects or fig, just as the [Plotly aggregations tute](https://plot.ly/python/aggregations/) does it.

In [None]:
import plotly.offline as off

off.init_notebook_mode(connected=False)

data = [dict(
  type = 'scatter',
  x = df['HostID'],
  y = df['Price'],
  mode = 'markers',
  transforms = [dict(
    type = 'aggregate',
    groups = df['HostID'],
    aggregations = [dict(
        target = 'y', func = 'sum', enabled = True),
    ]
  )]
)]


off.iplot({'data': data}, validate=False)

Now combine the code.

In [None]:
import pandas
from plotly.offline import init_notebook_mode, iplot
# import plotly.figure_factory as ff
# import plotly.graph_objs as go
import logging
logging.getLogger().setLevel(logging.DEBUG)
# logging.getLogger().setLevel(logging.INFO)

wfs_server = 'https://citydata.be.unsw.edu.au/geoserver/wfs'
wfs_parms = 'srsName=EPSG%3A4326&typename=geonode%3AInsideAirbnb_44_2015_17&outputFormat=csv&version=1.0.0&service=WFS&request=GetFeature'

city = 'Sydney'
cql_filter_parm = 'cql_filter=City=\'' + city + '\''

properties = 'City,PropertyID,HostID,Latitude,Longitude,Price'
propname_parm = 'PropertyName=' + properties

max_features = 1000000

url = wfs_server + '?' + wfs_parms + "&" + cql_filter_parm + "&" + propname_parm

if max_features > 0:
    max_features_parm = 'maxFeatures=' + str(max_features)
    url += "&" + max_features_parm

logging.debug('url: ' + url)

df = pandas.read_csv(url)

logging.debug('df.shape:' + str(df.shape))

logging.info('list df:' + str(list(df)))

# Plot data in the notebook

init_notebook_mode(connected=False)

data = [dict(
  type = 'scatter',
  x = df['HostID'],
  y = df['Price'],
  mode = 'markers',
  transforms = [dict(
    type = 'aggregate',
    groups = df['HostID'],
    aggregations = [dict(
        target = 'y', func = 'sum', enabled = True),
    ]
  )]
)]

iplot({'data': data}, validate=False)


Wow there are a handful of hosts with huge portfolios!

But remember there may be more than one scrape for Sydney, so we may be double or triple counting properties.

Let's find the last scrape date and use that.

In [None]:
import pandas
from plotly.offline import init_notebook_mode, iplot
# import plotly.figure_factory as ff
# import plotly.graph_objs as go
import logging
logging.getLogger().setLevel(logging.INFO)
# logging.getLogger().setLevel(logging.INFO)

wfs_server = 'https://citydata.be.unsw.edu.au/geoserver/wfs'
wfs_parms = 'srsName=EPSG%3A4326&typename=geonode%3AInsideAirbnb_44_2015_17&outputFormat=csv&version=1.0.0&service=WFS&request=GetFeature'

city = 'Sydney'
cql_filter_parm = 'cql_filter=City=\'' + city + '\''

properties = 'City,PropertyID,HostID,Latitude,Longitude,Price,ScrapeDate'
propname_parm = 'PropertyName=' + properties

max_features = 10000000

url = wfs_server + '?' + wfs_parms + "&" + cql_filter_parm + "&" + propname_parm

if max_features > 0:
    max_features_parm = 'maxFeatures=' + str(max_features)
    url += "&" + max_features_parm

logging.debug('url: ' + url)

df = pandas.read_csv(url)

logging.debug('df.shape:' + str(df.shape))

logging.debug('list df:' + str(list(df)))

logging.info(df['ScrapeDate'].max())

# Plot data in the notebook

# init_notebook_mode(connected=False)

# data = [dict(
#   type = 'scatter',
#   x = df['HostID'],
#   y = df['Price'],
#   mode = 'markers',
#   transforms = [dict(
#     type = 'aggregate',
#     groups = df['HostID'],
#     aggregations = [dict(
#         target = 'y', func = 'sum', enabled = True),
#     ]
#   )]
# )]

# iplot({'data': data}, validate=False)


So the last scrape of Sydney was 4th April 2017. Let's add that to the cql_filter so we only get the last scrape of Sydney records.

In [None]:
import pandas
from plotly.offline import init_notebook_mode, iplot
# import plotly.figure_factory as ff
# import plotly.graph_objs as go
import logging
logging.getLogger().setLevel(logging.DEBUG)
# logging.getLogger().setLevel(logging.INFO)

wfs_server = 'https://citydata.be.unsw.edu.au/geoserver/wfs'
wfs_parms = 'srsName=EPSG%3A4326&typename=geonode%3AInsideAirbnb_44_2015_17&outputFormat=csv&version=1.0.0&service=WFS&request=GetFeature'

city = 'Sydney'
scrapedate = '2017-04-03T00:00:00'

cql_filter_parm = 'cql_filter=City=\'' + city + '\'' + '%20AND%20' + 'ScrapeDate=' + scrapedate

properties = 'City,PropertyID,HostID,Latitude,Longitude,Price,ScrapeDate'
propname_parm = 'PropertyName=' + properties

max_features = 0

url = wfs_server + '?' + wfs_parms + "&" + cql_filter_parm + "&" + propname_parm

if max_features > 0:
    max_features_parm = 'maxFeatures=' + str(max_features)
    url += "&" + max_features_parm

logging.debug('url: ' + url)

df = pandas.read_csv(url)

logging.debug('df.shape:' + str(df.shape))

logging.debug('list df:' + str(list(df)))

logging.info(df['ScrapeDate'].max())

# Plot data in the notebook

# init_notebook_mode(connected=False)

# data = [dict(
#   type = 'scatter',
#   x = df['HostID'],
#   y = df['Price'],
#   mode = 'markers',
#   transforms = [dict(
#     type = 'aggregate',
#     groups = df['HostID'],
#     aggregations = [dict(
#         target = 'y', func = 'sum', enabled = True),
#     ]
#   )]
# )]

# iplot({'data': data}, validate=False)


We have 24038 rows now instead of 57,000 or so, so we have filtered just to the last scrape in April 2017.

Now let's plot it again.

In [None]:

# Plot data in the notebook

init_notebook_mode(connected=False)

data = [dict(
  type = 'scatter',
  x = df['HostID'],
  y = df['Price'],
  mode = 'markers',
  transforms = [dict(
    type = 'aggregate',
    groups = df['HostID'],
    aggregations = [dict(
        target = 'y', func = 'sum', enabled = True),
    ]
  )]
)]

iplot({'data': data}, validate=False)
