# Оформление проекта

Определитесь, в каком виде вы хотите представить ваш проект. Мы предлагаем вам сделать интерактивное демо, содержащее:

* карты с визуализацией реального и прогнозируемого спроса на такси в выбираемый пользователем момент времени
* временной ряд фактического и прогнозируемого спроса на такси в выбираемой области.

Немного материалов о том, как в ноутбуках можно делать интерактивные графики:

* https://blog.dominodatalab.com/interactive-dashboards-in-jupyter/
* http://nbviewer.jupyter.org/github/quantopian/ipython/blob/master/examples/Interactive%20Widgets/Index.ipynb
* https://github.com/ioam/holoviews

Чтобы сдать задание, создайте демо и дайте ссылку на него.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn
import gc
from datetime import timedelta
import datetime
import holoviews as hv
import param
import paramnb
from holoviews.operation.timeseries import rolling, rolling_outlier_std
from holoviews.streams import Stream

hv.extension('bokeh')

%matplotlib inline

In [2]:
%%time
data = pd.read_csv('../result_data_new.csv', index_col='pickup_datetime', parse_dates=['pickup_datetime'], 
                   usecols=['region_id', 'count', 'pickup_datetime'])
data = data["2016-06-01":]
data.reset_index(inplace=True)
regions = np.unique(data['region_id'])
data.set_index(["region_id", "pickup_datetime"], inplace=True)

Wall time: 2.84 s


In [3]:
print data.head()
print data.tail()

                           count
region_id pickup_datetime       
1075      2016-06-01          26
1076      2016-06-01          30
1077      2016-06-01          19
1125      2016-06-01          39
1126      2016-06-01          71
                               count
region_id pickup_datetime           
2068      2016-06-30 23:00:00    147
2069      2016-06-30 23:00:00     38
2118      2016-06-30 23:00:00    173
2119      2016-06-30 23:00:00    119
2168      2016-06-30 23:00:00      0


In [4]:
print data[(data.index.get_level_values("region_id") == 1075) 
                & (data.index.get_level_values("pickup_datetime") == "2016-06-01 00:00:00")]

                           count
region_id pickup_datetime       
1075      2016-06-01          26


In [5]:
tmp2 = pd.read_csv("model_results.csv", index_col=['region_id', 'pickup_datetime'])
print tmp2.head()

                                   y_t+1      y_t+2      y_t+3      y_t+4  \
region_id pickup_datetime                                                   
1075      2016-05-31 23:00:00  24.333333  16.944444  10.500000   7.277778   
          2016-06-01 00:00:00  26.333333  16.444444   5.500000   4.611111   
          2016-06-01 01:00:00   8.000000   3.888889   5.777778   8.388889   
          2016-06-01 02:00:00   5.333333   4.166667  10.833333  19.500000   
          2016-06-01 03:00:00   5.777778   9.222222  19.500000  42.444444   

                                   y_t+5      y_t+6  
region_id pickup_datetime                            
1075      2016-05-31 23:00:00   6.000000   9.111111  
          2016-06-01 00:00:00   9.444444  20.111111  
          2016-06-01 01:00:00  21.666667  48.888889  
          2016-06-01 02:00:00  47.111111  88.722222  
          2016-06-01 03:00:00  70.888889  66.000000  


In [6]:
def get_predicted_data(df, region, date):
    return df[(df.index.get_level_values("region_id") == region) 
                & (df.index.get_level_values("pickup_datetime") == date)].values.reshape((6,))

def get_true_data(df, region, date):
    result = []
    date = pd.to_datetime(date) + timedelta(hours=1)
    dates = pd.date_range(date, date + timedelta(hours=5), freq='1H')
    for date in dates:
        result.append(df[(df.index.get_level_values("region_id") == region) 
                & (df.index.get_level_values("pickup_datetime") == date)].values[0][0])
    return np.array(result)

In [10]:
def draw_plot(data, key_name, value_name, label):
    return hv.Curve(data, kdims=[key_name], vdims=[value_name], label=label)


def draw_series(date, region):
    true_values = get_true_data(data, region, date)
    predicted_values = get_predicted_data(tmp2, region, date)
    dates = pd.date_range(date, pd.to_datetime(date) + timedelta(hours=5), freq='1H')
    #print predicted_values
    #print true_values
    tt = pd.DataFrame(data={'y': predicted_values, 'date': dates, 'real_y': true_values})
    #%%opts Curve  [height=100 width=200 xaxis='bottom' show_grid=True tools=['hover']]
    return draw_plot(tt, 'date', 'y', 'predicted') * draw_plot(tt, 'date', 'real_y', 'true')
    #fig = plt.figure(figsize=(25,10))
    #plt.plot(tt['date'], tt['y'])
    #plt.plot(tt['date'], tt['real_y'])

In [12]:
%output size=400
%opts Curve  [height=100 width=200 xaxis='bottom' show_grid=True tools=['hover']]
draw_series("2016-05-31 23:00:00", 1075)

In [43]:
class TaxiPredictionExplorer(hv.streams.Stream):
    
    date = param.ObjectSelector(default="2016-05-31 23:00:00",
                                objects=pd.date_range("2016-05-31 23:00:00", "2016-06-30 17:00:00", freq='1h').astype(str))
    region = param.ObjectSelector(default=regions[0], objects=regions)
    
    def view(self):
        dmap = hv.DynamicMap(draw_series, kdims=[], streams=[self])
        dmap.redim.values(date=pd.date_range(self.date, pd.to_datetime(self.date) + timedelta(hours=6), freq='1h').astype(str))
        dmap.reset
        return dmap

In [44]:
%opts Curve [width=200 height=100]
explorer = TaxiPredictionExplorer()
paramnb.Widgets(explorer, continuous_update=True, callback=explorer.event, on_init=True)
explorer.view()

<IPython.core.display.Javascript object>