In [1]:
from ipywidgets import widgets
import numpy as np
import pandas as pd
from datetime import timedelta, datetime
from branca.colormap import linear, LinearColormap
from ipyleaflet import Map, Polygon, Marker, GeoJSON
from ipywidgets import HTML, interactive, HBox, VBox, interact, widgets, Box
import json
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objects as go

  data = yaml.load(f.read()) or {}
  defaults = yaml.load(f)


# Загрузка предсказанных и истинных данных

In [2]:
real_aggregated_data = pd.read_csv('aggregated_2014-06_to_2016-06.csv', parse_dates=[0], index_col=0)
real_aggregated_data.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2490,2491,2492,2493,2494,2495,2496,2497,2498,2499
2014-06-01,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
data_may_2016 = real_aggregated_data.loc['2016-05-01 00:00:00':'2016-05-31 23:00:00', :]
region_ids = data_may_2016.loc[:, data_may_2016.mean()>5].columns

In [4]:
predicted_data = pd.read_csv('kaggle_week6.csv', index_col=0)
predicted_data.head(1)

Unnamed: 0_level_0,y
id,Unnamed: 1_level_1
1075_2016-05-31_23_1,19.422735


In [5]:
filtered_data = real_aggregated_data.loc['2016-05-31 23:00:00':, region_ids]
filtered_data.tail(1)

Unnamed: 0,1075,1076,1077,1125,1126,1127,1128,1129,1130,1131,...,1630,1684,1733,1734,1783,2068,2069,2118,2119,2168
2016-06-30 23:00:00,85,130,86,113,256,428,483,531,631,210,...,9,0,7,323,110,147,38,173,119,0


### преобразование предсказанных данных в удобный для работы вид

In [6]:
ID_list = []
end_story_time_list = []
predicted_time_list = []
shift_list = []
for item in predicted_data.index:
    ID, date, hour, shift = item.split('_')
    year, month, day = date.split('-')
    end_story_time = datetime(int(year), int(month), int(day), int(hour))
    predcited_time = end_story_time + timedelta(hours = int(shift))
    
    ID_list.append(ID)
    end_story_time_list.append(end_story_time)
    predicted_time_list.append(predcited_time)
    shift_list.append(shift)
predicted_data['region_id'] = ID_list
predicted_data['end_story_time'] = end_story_time_list
predicted_data['shift'] = shift_list
predicted_data['predicted_time'] = predicted_time_list

In [7]:
predicted_data.head(1)

Unnamed: 0_level_0,y,region_id,end_story_time,shift,predicted_time
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1075_2016-05-31_23_1,19.422735,1075,2016-05-31 23:00:00,1,2016-06-01


### Напишем вспомогательные функции для отрисовки карт

In [8]:
longitude_low_bound = -74.25559
longitude_high_bound = -73.70001
latitude_low_bound = 40.49612
latitude_high_bound = 40.91553
step_count = 50
lon_step = 0.0111116
lat_step = 0.0083882
empire_state_lon = -73.985756
empire_state_lat = 40.748306
empire_coords = (empire_state_lat, empire_state_lon)
lat_center = (latitude_low_bound + latitude_high_bound)/2
lon_center = (longitude_low_bound + longitude_high_bound)/2

## функция получения границ полигона для конкретного региона
def get_poligon_bound_points(region_id):
    region_id = int(region_id) - 1
    lat_steps_count = region_id % 50
    lon_steps_count = region_id // 50
    low_lat_bound = latitude_low_bound+lat_steps_count*lat_step
    low_lon_bound = longitude_low_bound+lon_steps_count*lon_step
    return [(low_lat_bound, low_lon_bound), (low_lat_bound+lat_step, low_lon_bound),
            (low_lat_bound+lat_step, low_lon_bound+lon_step), (low_lat_bound, low_lon_bound+lon_step)]

## функция для получения связки полигоны - реальные данные
def get_polygons_dict(current_date):
    polygons = {}
    max_trips = real_aggregated_data.loc[current_date].max()
    linear_colormap = LinearColormap(linear.YlOrRd_04.colors, vmax = max_trips)
    linear_colormap.caption = 'Число поездок такси'
    for region_id in region_ids:
        borders = get_poligon_bound_points(region_id)
        value = real_aggregated_data.loc[current_date, str(region_id)]
        polygon = Polygon(
            locations=borders,
            color=linear_colormap.rgb_hex_str(value),
            fill_color=linear_colormap.rgb_hex_str(value),
            fill_opacity = 0.5,
            stroke = False
        )
        popup_content = HTML()
        popup_content.value = 'region_id: {}'.format(region_id) + '<br> Trips: {}'.format(value)
        polygon.popup = popup_content
        polygons[region_id] = polygon
    return polygons

## функция для получения связки полигоны - предсказанные данные
def get_polygons_predicted_dict(current_date, shift):
    polygons = {}
    max_trips = predicted_data[(predicted_data['end_story_time'] == current_date)
                               & (predicted_data['shift'] == str(shift))].y.max()
    linear_colormap = LinearColormap(linear.YlOrRd_04.colors, vmax = max_trips)
    linear_colormap.caption = 'Число поездок такси'
    for region_id in region_ids:
        borders = get_poligon_bound_points(region_id)
        value = predicted_data[(predicted_data['end_story_time'] == current_date)
                               & (predicted_data['shift'] == str(shift))
                               & (predicted_data['region_id'] == str(region_id))].y[0]
        polygon = Polygon(
            locations=borders,
            color=linear_colormap.rgb_hex_str(value),
            fill_color=linear_colormap.rgb_hex_str(value),
            fill_opacity = 0.5,
            stroke = False
        )
        popup_content = HTML()
        popup_content.value = 'region_id: {}'.format(region_id) + '<br> Trips: {}'.format(value)
        polygon.popup = popup_content
        polygons[region_id] = polygon
    return polygons

In [9]:
def draw_map_with_real_data(current_date):
    m = Map(
        center=(lat_center, lon_center),
        zoom=11,
        dragging = True,
        touch_zoom = False,
        double_click_zoom = False,
        box_zoom = False,
        tap = False,
        keyboard = False,
        zoom_control = False
    )

    for region_id, poly in get_polygons_dict(current_date).items():
        m.add_layer(poly)

    display(m)

def draw_map_with_predicted_data(current_date, shift):
    m = Map(
        center=(lat_center, lon_center),
        zoom=11,
        dragging = True,
        touch_zoom = False,
        double_click_zoom = False,
        box_zoom = False,
        tap = False,
        keyboard = False,
        zoom_control = False
    )

    for region_id, poly in get_polygons_predicted_dict(current_date, shift).items():
        m.add_layer(poly)

    display(m)

In [10]:
style = {'description_width': 'initial'}
real_slider = interactive(draw_map_with_real_data, 
                          current_date = widgets.SelectionSlider(options = filtered_data.index,
                                                              description = 'Текущая дата ',
                                                            style = style
                                                            )
                         )

predicted_slider = interactive(draw_map_with_predicted_data, 
                               current_date = widgets.SelectionSlider(options = predicted_data['end_story_time'].unique(), 
                                                                   description = 'Базовая дата ',
                                                            style = style
                                                                  ),
                               shift = widgets.SelectionSlider(options = [str(x) for x in range(1, 7)],
                                                               description = 'Сдвиг ',
                                                            style = style
                                                              )
                              )

In [11]:
HBox([real_slider, predicted_slider])

HBox(children=(interactive(children=(SelectionSlider(description='Текущая дата ', options=(Timestamp('2016-05-…

не стал доводить до "идеального" поведения, т.к. здесь много безумной борьбы с JS и стилями

### Временные линии

In [12]:
# получаем временной ряд для региона и сдвига
def get_predicted_time_series(region_id, shift):
    id_filter = (predicted_data['region_id']==str(region_id))
    shift_filter = (predicted_data['shift']==str(shift))
    temp_df = predicted_data[id_filter & shift_filter]
    return [temp_df.predicted_time, temp_df.y.values]

# функция для обновления данных на графике
def update_choice(region_id, shift):
    timeseries_f.data[0].y = filtered_data[region_id]
    timeseries_f.data[1].x, timeseries_f.data[1].y = get_predicted_time_series(region_id, shift)

In [13]:
trace_real = go.Scatter(x = filtered_data.index, 
                        y = filtered_data['1075'],
                        name = 'Real data'
                       )

predicted_initial = get_predicted_time_series(region_id='1075', shift=1)
trace_predicted = go.Scatter(x = predicted_initial[0], 
                             y = predicted_initial[1],
                             name = 'Predicted data'
                            )

data = [trace_real, trace_predicted]

layout = dict(title = 'Time Series',
              xaxis = dict(rangeslider = dict(visible = True),
                           type = 'date'),
              height=600,
             )
timeseries_f = go.FigureWidget(data = data, layout=layout)

In [14]:
choice_slider = interactive(update_choice,
                            region_id = widgets.SelectionSlider(options = filtered_data.columns,
                                                                description = 'region_id'),
                            shift = widgets.SelectionSlider(options = range(1, 7),
                                                            description = 'Shift')
                           )

In [16]:
VBox([timeseries_f, choice_slider])

VBox(children=(FigureWidget({
    'data': [{'name': 'Real data',
              'type': 'scatter',
            …