# Import Libs and Data

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import folium
import geopandas as gpd
from shapely.geometry import Polygon, box

import matplotlib.pyplot as plt
%matplotlib inline

# 1. Загрузка данных

In [4]:
df = pd.read_csv('final_data.csv', index_col=0)
df.head()

  mask |= (ar1 == a)


Unnamed: 0,region,tpep_pickup_dth,trips
0,1,2016-05-01 00:00:00,0.0
1,1,2016-05-04 22:00:00,0.0
2,1,2016-05-04 23:00:00,0.0
3,1,2016-05-05 00:00:00,0.0
4,1,2016-05-05 01:00:00,0.0


In [5]:
df_r1 = df.groupby('region', as_index=False).sum()
df_r1.head()

Unnamed: 0,region,trips
0,1,0.0
1,2,0.0
2,3,0.0
3,4,0.0
4,5,0.0


In [12]:
print('%d регионов, откуда не было поездок' %df_r1[df_r1['trips'] == 0]['trips'].count())

1283 регионов, откуда не было поездок


# 2. Карта Нью-Йорка

**2 и 3 задание сделано с использованием библиотеки folium, т.к. не удалось установить basemap для статической карты**

New York:  
Latitude 40.730610  
Longitude -73.935242

ESB:  
Latitude 40.748323  
Longitude -73.985709

In [13]:
m = folium.Map(location=[40.730610, -73.935242], zoom_start=11)
folium.Marker([40.748323, -73.985709], popup='<b>Empire State Building</b>', tooltip='ESB').add_to(m)
m

# 3. Choropleth

In [16]:
df_reg = pd.read_csv('regions.csv', sep=';')
df_reg.head()

Unnamed: 0,region,west,east,south,north
0,1,-74.25559,-74.244478,40.49612,40.504508
1,2,-74.25559,-74.244478,40.504508,40.512896
2,3,-74.25559,-74.244478,40.512896,40.521285
3,4,-74.25559,-74.244478,40.521285,40.529673
4,5,-74.25559,-74.244478,40.529673,40.538061


In [17]:
# преобразование координат региона в гео-формат
def make_boxes(row):
    poly = box(row['west'], row['south'], row['east'], row['north'])
    return poly

In [18]:
df_reg['geometry'] = df_reg.apply(make_boxes, axis=1)

In [24]:
# переведем df_reg в формат GeoDataFrame
gpd_reg = gpd.GeoDataFrame(df_reg[['region', 'geometry']], geometry=df_reg['geometry'])
#gpd_reg.crs = {'init' :'epsg:4326'}  
#gpd_reg.to_crs({'init': 'epsg:3395'})

In [25]:
gpd_reg.head()

Unnamed: 0,region,geometry
0,1,"POLYGON ((-74.24448 40.49612, -74.24448 40.504..."
1,2,"POLYGON ((-74.24448 40.50451, -74.24448 40.512..."
2,3,"POLYGON ((-74.24448 40.51290, -74.24448 40.521..."
3,4,"POLYGON ((-74.24448 40.52128, -74.24448 40.529..."
4,5,"POLYGON ((-74.24448 40.52967, -74.24448 40.538..."


In [26]:
folium.Choropleth(geo_data=gpd_reg.to_json(),
                  data=df_r1[['region', 'trips']],
                  columns=['region', 'trips'],
                  key_on='feature.properties.region',                  
                  fill_color='YlGnBu',
                  line_weight=1,
                  fill_opacity=0.7,
                  line_opacity=0.2,
                  legend_name='Trips count',
                  highlight=True                  
                 ).add_to(m)                
m

# 4. Интерактивная карта с отметкой Статуи Свободы

Statue of Liberty coordinate: 40.689247, -74.044502.

In [28]:
m = folium.Map(location=[40.730610, -73.935242], zoom_start=11)
folium.Marker([40.689247, -74.044502], popup='<b>Statue of Liberty</b>', tooltip='Statue of Liberty').add_to(m)
m

# 5. Среднее за месяц количество поездок такси в час

In [33]:
df.head()

Unnamed: 0,region,tpep_pickup_dth,trips,dt_pickup
0,1,2016-05-01 00:00:00,0.0,2016-05-01 00:00:00
1,1,2016-05-04 22:00:00,0.0,2016-05-04 22:00:00
2,1,2016-05-04 23:00:00,0.0,2016-05-04 23:00:00
3,1,2016-05-05 00:00:00,0.0,2016-05-05 00:00:00
4,1,2016-05-05 01:00:00,0.0,2016-05-05 01:00:00


In [47]:
#make datetime format
df['dt_pickup'] = pd.to_datetime(df['tpep_pickup_dth'], format = '%Y-%m-%d %H:%M:%S')

# всего часов 
hrs = df['dt_pickup'].max() - df['dt_pickup'].min()
hrs =hrs / pd.Timedelta('1 hour')


df_r1['trips_per_hour'] = df_r1['trips']/hrs

In [87]:
m = folium.Map(location=[40.730610, -73.935242], zoom_start=11)

folium.Choropleth(geo_data=gpd_reg.to_json(),
                  data=df_r1[['region', 'trips_per_hour']],
                  columns=['region', 'trips_per_hour'],
                  key_on='feature.properties.region',                  
                  fill_color='YlGnBu',
                  line_weight=1,
                  fill_opacity=0.7,
                  line_opacity=0.2,
                  legend_name='Trips per hour',
                  highlight=True                  
                 ).add_to(m)                
m

# 6. Регионы с кол-вом поездок в час больше 5

In [58]:
df_r2 = df_r1[df_r1['trips_per_hour'] >= 5]

print('%d регионов с кол-вом поездок в час больше 5' % df_r2['region'].count())

102 регионов с кол-вом поездок в час больше 5


In [75]:
gpd_reg102 = gpd_reg[gpd_reg['region'].isin(df_r2['region'].values.tolist())]

In [76]:
m = folium.Map(location=[40.730610, -73.935242], zoom_start=11)

folium.Choropleth(geo_data=gpd_reg102.to_json(),
                  data=df_r2[['region', 'trips_per_hour']],
                  columns=['region', 'trips_per_hour'],
                  key_on='feature.properties.region',                  
                  fill_color='YlGnBu',
                  line_weight=1,
                  fill_opacity=0.7,
                  line_opacity=0.2,
                  legend_name='Trips per hour',
                  highlight=True                  
                 ).add_to(m)                
m

In [92]:
df_r2.to_csv('w2_tph_filter.csv')

In [91]:
df_r2.sort_values(by='trips_per_hour')

Unnamed: 0,region,trips,trips_per_hour
1219,1220,3752.0,5.049798
1439,1440,3781.0,5.088829
1628,1629,3839.0,5.166891
1223,1224,3991.0,5.371467
1731,1732,4033.0,5.427995
1578,1579,4063.0,5.468371
1424,1425,4091.0,5.506057
1170,1171,4914.0,6.613728
1376,1377,5244.0,7.057873
1440,1441,5252.0,7.068641
