In [4]:
# Imports
import pandas as pd
import folium
from folium.plugins import MarkerCluster, HeatMap, HeatMapWithTime
import numpy as np
import datetime as dt

In [5]:
# read csv file
df = pd.read_csv('tripdata_01.csv')

In [6]:
df.head()

Unnamed: 0.1,Unnamed: 0,pickup_time,dropoff_time,passenger_count,trip_distance,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,tip_amount,total_amount,trip_duration,same_day,pickup_hour,dropoff_hour,day_of_week,weekend,speed,time_of_day
0,0,2009-01-04 02:52:00,2009-01-04 03:02:00,1,2.63,-73.991957,40.721567,-73.993803,40.695922,0.0,9.4,10.0,1.0,2,3,6,1.0,0.26,2.0
1,1,2009-01-04 03:31:00,2009-01-04 03:38:00,3,4.55,-73.982102,40.73629,-73.95585,40.76803,2.0,14.6,7.0,1.0,3,3,6,1.0,0.65,2.0
2,2,2009-01-03 15:43:00,2009-01-03 15:57:00,5,10.35,-74.002587,40.739748,-73.869983,40.770225,4.74,28.44,14.0,1.0,15,15,5,1.0,0.74,0.0
3,3,2009-01-01 20:52:58,2009-01-01 21:14:00,1,5.0,-73.974267,40.790955,-73.996558,40.731849,3.05,18.45,21.033333,1.0,20,21,3,0.0,0.24,0.0
4,4,2009-01-24 16:18:23,2009-01-24 16:24:56,1,0.4,-74.00158,40.719382,-74.008378,40.72035,0.0,3.7,6.55,1.0,16,16,5,1.0,0.06,0.0


In [7]:
df.drop(['Unnamed: 0'],axis=1,inplace=True)

# Most Frequent Locations

In [8]:
# find the most common latitude and longitude

# for pickup
lon = df['pickup_longitude'].mode()[0]
#grouped_lon = df.groupby('pickup_longitude').agg(count=('pickup_longitude', 'count')) #['pickup_longitude'].count()
#sorted_lon =grouped_lon.sort_values('pickup_longitude', ascending=False)
#print(sorted_lon.head())
lat = df['pickup_latitude'].mode()[0]

# for dropoff
lon2 = df['dropoff_longitude'].mode()[0]
lat2 = df['dropoff_latitude'].mode()[0]

print("Most common longitude and latitude for pickup are: ", lon, " and ", lat)
print("Most common longitude and latitude for dropoff are: ", lon2, " and ", lat2)

Most common longitude and latitude for pickup are:  -73.98224999999998  and  40.733697
Most common longitude and latitude for dropoff are:  -73.951818  and  40.733697


In [9]:
# find the most common pair of coordinates
# for pickup
pickup = df.groupby(['pickup_longitude','pickup_latitude']).size().sort_values(ascending=False)
# for dropoff
dropoff = df.groupby(['dropoff_longitude','dropoff_latitude']).size().sort_values(ascending=False)

most_frequent_pickup = pickup.head(1)
pair_lon1 = most_frequent_pickup.index.to_list()[0][0]
pair_lat1 = most_frequent_pickup.index.to_list()[0][1]

most_frequent_dropoff = dropoff.head(1)
pair_lon2 = most_frequent_dropoff.index.to_list()[0][0]
pair_lat2 = most_frequent_dropoff.index.to_list()[0][1]

In [10]:
print(pair_lon1, pair_lat1)
print(pair_lon2, pair_lat2)

-73.951818 40.733697
-73.951818 40.733697


RESULT: most common pickup and dropoff pair is at the same place!

In [11]:
# make a map

# center of the map
center = [40.730610,-73.935242]

# create map
m = folium.Map(location=center,zoom_start=11)

# display the map
m

In [12]:
# add points to map
folium.Marker([lat,lon], popup = f"Most Frequent Pickup (Seperate)").add_to(m)
folium.Marker([lat2,lon2], popup = f"Most Frequent Dropoff (Seperate)").add_to(m)

folium.Marker([pair_lat1,pair_lon1], popup = f"Most Frequent Pickup (Pair)").add_to(m)
folium.Marker([pair_lat2,pair_lon2], popup = f"Most Frequent Dropoff (Pair)").add_to(m)

<folium.map.Marker at 0x1c483496a30>

In [13]:
# add Breitengrade and Längengrade according to most common coordinates
limits_longitude = np.array([-74.266649,-73.689083]) # the limits of New York City (longitude)
limits_latitude = np.array([40.493974,40.925023]) # the limits of New York City (latitude)
# for pickup
points1 = [(lat,limits_longitude[0]),(lat,limits_longitude[1])]
folium.PolyLine(points1, color="red", weight=2.5, opacity=1).add_to(m)
points2 = [(limits_latitude[0],lon),(limits_latitude[1],lon)]
folium.PolyLine(points2, color="red", weight=2.5, opacity=1).add_to(m)
# for dropoff
points3 = [(lat2,limits_longitude[0]),(lat2,limits_longitude[1])]
points4 = [(limits_latitude[0],lon2),(limits_latitude[1],lon2)]
folium.PolyLine(points3, color="green", weight=2.5, opacity=1).add_to(m)
folium.PolyLine(points4, color="green", weight=2.5, opacity=1).add_to(m)
m

# Heatmap of Locations

In [14]:
# Heatmap of the first 100000 pickup locations - See generateHeatmap.py for more details
m = folium.Map(location=center, zoom_start=11)
m.add_child(folium.plugins.HeatMap(df[['pickup_latitude', 'pickup_longitude']].head(100000).values.tolist(), radius=15))
m

### Heatmap with Time

In [18]:
m = folium.Map(location=center, zoom_start=11)
df['pickup_time'] = pd.to_datetime(df.pickup_time)
df['dropoff_time'] = pd.to_datetime(df.dropoff_time)
df_copy = df.head(10000)
df_copy['day_of_year'] = df_copy['pickup_time'].dt.day_of_year
#df_copy['day_of_year'].unique()
#print(type(df_copy['pickup_time'][0]))
#= df_copy['pickup_time'].dt.day_of_year
#heat_data = df_copy[['pickup_latitude','pickup_longitude']]

time_index = list(df_copy['day_of_year'].sort_values().astype('str').unique())
print(time_index)

['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_copy['day_of_year'] = df_copy['pickup_time'].dt.day_of_year


In [20]:
df_copy['day_of_year'] = df_copy['day_of_year'].sort_values(ascending=True)

data = []
for _, d in df_copy.groupby('day_of_year'):
    data.append([[row['pickup_latitude'],row['pickup_longitude']] for _, row in d.iterrows()])

hmt = folium.Map(location= center,zoom_start=11,control_scale=True)
HeatMapWithTime(data,index=time_index,auto_play=True,use_local_extrema=True).add_to(hmt)

hmt

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_copy['day_of_year'] = df_copy['day_of_year'].sort_values(ascending=True)
