# Before Getting Started

In [1]:
# Dependencies
import pandas as pd
import folium

In [2]:
# To test that everything is working, 
# let’s pull up a map of New York City and add a circle marker. 
# In a Jupyter notebook, run:
folium_map = folium.Map(location=[40.738, -73.98],
                        zoom_start=13,
                        tiles="CartoDB dark_matter")

marker = folium.CircleMarker(location=[40.738, -73.98])
marker.add_to(folium_map)

<folium.features.CircleMarker at 0xc784ca7080>

In [3]:
# To display this map in a Jupyter notebook, 
# just type the name of your map in a separate cell and run the cell. 
folium_map

In [4]:
# You can also save to a stand-alone html file:
folium_map.save("my_map.html")

# Showing Some Real Data, NYC Bike Trips

In [5]:
# Load  data that the NYC bike share program makes its data public:
# https://www.citibikenyc.com/system-data
# Just one month of data will do for this example:
# e.g. 201610-citibike-tripdata.csv
# Use pandas to load the data into python, 
# and convert time strings into DateTime objects. 
bike_data = pd.read_csv("201610-citibike-tripdata.csv")

# Display the DataFrame
bike_data.head()

Unnamed: 0,Trip Duration,Start Time,Stop Time,Start Station ID,Start Station Name,Start Station Latitude,Start Station Longitude,End Station ID,End Station Name,End Station Latitude,End Station Longitude,Bike ID,User Type,Birth Year,Gender
0,328,2016-10-01 00:00:07,2016-10-01 00:05:35,471,Grand St & Havemeyer St,40.712868,-73.956981,3077,Stagg St & Union Ave,40.708771,-73.950953,25254,Subscriber,1992.0,1
1,398,2016-10-01 00:00:11,2016-10-01 00:06:49,3147,E 85 St & 3 Ave,40.778012,-73.954071,3140,1 Ave & E 78 St,40.771404,-73.953517,17810,Subscriber,1988.0,2
2,430,2016-10-01 00:00:14,2016-10-01 00:07:25,345,W 13 St & 6 Ave,40.736494,-73.997044,470,W 20 St & 8 Ave,40.743453,-74.00004,20940,Subscriber,1965.0,1
3,351,2016-10-01 00:00:21,2016-10-01 00:06:12,3307,West End Ave & W 94 St,40.794165,-73.974124,3357,W 106 St & Amsterdam Ave,40.800836,-73.966449,19086,Subscriber,1993.0,1
4,2693,2016-10-01 00:00:21,2016-10-01 00:45:15,3428,8 Ave & W 16 St,40.740983,-74.001702,3323,W 106 St & Central Park West,40.798186,-73.960591,26502,Subscriber,1991.0,1


In [6]:
# Adds a column to the table indicating the hour of the day. 
bike_data["Start Time"] = pd.to_datetime(bike_data["Start Time"])
bike_data["Stop Time"] = pd.to_datetime(bike_data["Stop Time"])
bike_data["hour"] = bike_data["Start Time"].map(lambda x: x.hour)

# Display the DataFrame
bike_data.head()

Unnamed: 0,Trip Duration,Start Time,Stop Time,Start Station ID,Start Station Name,Start Station Latitude,Start Station Longitude,End Station ID,End Station Name,End Station Latitude,End Station Longitude,Bike ID,User Type,Birth Year,Gender,hour
0,328,2016-10-01 00:00:07,2016-10-01 00:05:35,471,Grand St & Havemeyer St,40.712868,-73.956981,3077,Stagg St & Union Ave,40.708771,-73.950953,25254,Subscriber,1992.0,1,0
1,398,2016-10-01 00:00:11,2016-10-01 00:06:49,3147,E 85 St & 3 Ave,40.778012,-73.954071,3140,1 Ave & E 78 St,40.771404,-73.953517,17810,Subscriber,1988.0,2,0
2,430,2016-10-01 00:00:14,2016-10-01 00:07:25,345,W 13 St & 6 Ave,40.736494,-73.997044,470,W 20 St & 8 Ave,40.743453,-74.00004,20940,Subscriber,1965.0,1,0
3,351,2016-10-01 00:00:21,2016-10-01 00:06:12,3307,West End Ave & W 94 St,40.794165,-73.974124,3357,W 106 St & Amsterdam Ave,40.800836,-73.966449,19086,Subscriber,1993.0,1,0
4,2693,2016-10-01 00:00:21,2016-10-01 00:45:15,3428,8 Ave & W 16 St,40.740983,-74.001702,3323,W 106 St & Central Park West,40.798186,-73.960591,26502,Subscriber,1991.0,1,0


# Net Arrivals/Departures

In [7]:
# Explore if there is net migration of bikes from one bike station to another,
# and if this migration depends on the time of day. 
# To get started, create a DataFrame containing the locations of each station.

## Pre-Processing Data

In [8]:
# Write a function that does the following:
# - generate a DataFrame containing locations of stations
# - generates a DataFrame containing the number of trips originating at each station.
# - generates a DataFrame containing the number of trips arriving at each station.
# - join the three dataframes into one.

def get_trip_counts_by_hour(selected_hour):
    
    # Select the first occurrence of each station id.
    locations = bike_data.groupby("Start Station ID").first()
    
    # Select Start Station Name, Latitude, and Longitude as columns in DataFrame
    locations = locations.loc[:, ["Start Station Latitude",
                                  "Start Station Longitude",
                                  "Start Station Name"]]
    
    # Select one hour of the day.
    subset = bike_data[bike_data["hour"]==selected_hour]

    # Count trips with the same departure point.
    departure_counts =  subset.groupby("Start Station ID").count().iloc[:,[0]]
    departure_counts.columns= ["Departure Count"]
    
    # Count trips with the same arrival point.
    arrival_counts = subset.groupby("End Station ID").count().iloc[:,[0]]
    arrival_counts.columns= ["Arrival Count"]
    
    # Next we join the arrival counts, departure counts and locations into one table.
    trip_counts = departure_counts.join(locations).join(arrival_counts)
    
    # Return new DataFrame
    return trip_counts

In [9]:
# Generate data four a selected hour
# e.g. 9 for 9 am
# e.g. 18 for 6 pm
get_trip_counts_by_hour(9).head()

Unnamed: 0_level_0,Departure Count,Start Station Latitude,Start Station Longitude,Start Station Name,Arrival Count
Start Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
72,376,40.767272,-73.993929,W 52 St & 11 Ave,211.0
79,159,40.719116,-74.006667,Franklin St & W Broadway,270.0
82,40,40.711174,-74.000165,St James Pl & Pearl St,46.0
83,52,40.683826,-73.976323,Atlantic Ave & Fort Greene Pl,99.0
116,337,40.741776,-74.001497,W 17 St & 8 Ave,398.0


## Plotting Markers for Each Station

In [10]:
# Add data to the map. 
# We’ll iterate over all the rows in the DataFrame we just created and add a marker for each row. 
# We assign a different color depending on the sign of the net departures. 
# If there are more departures than arrivals, we draw a tangerine circle, other wise we use teal.

def plot_station_counts(trip_counts):
    # Generate a new map
    folium_map = folium.Map(location=[40.738, -73.98],
                        zoom_start=13,
                        tiles="CartoDB dark_matter",
                        width='50%')

    # for each row in the data, add a cicle marker
    for index, row in trip_counts.iterrows():
        # Calculate net departures
        net_departures = (row["Departure Count"]-row["Arrival Count"])
        
        # generate the popup message that is shown on click.
        popup_text = "{}<br> total departures: {}<br> total arrivals: {}<br> net departures: {}"
        popup_text = popup_text.format(row["Start Station Name"],
                          row["Arrival Count"],
                          row["Departure Count"],
                          net_departures)
        
        # radius of circles
        radius = net_departures/20
        
        # choose the color of the marker
        if net_departures>0:
            color="#E37222" # tangerine (orange)
        else:
            color="#0A8A9F" # teal (blue)
            
        # add marker to the map
        folium.CircleMarker(location=(
                                row["Start Station Latitude"],
                                row["Start Station Longitude"]),
                                radius=radius,
                                color=color,
                                fill=True
                                      ).add_to(folium_map)
    return folium_map

## Showing Real data

In [11]:
# plot net departures at 9am
trip_counts = get_trip_counts_by_hour(9)
plot_station_counts(trip_counts)
plot_station_counts(trip_counts).save("partI-9am.html")
plot_station_counts(trip_counts)

In [12]:
# plot net departures at 6pm
trip_counts = get_trip_counts_by_hour(18)
plot_station_counts(trip_counts)
plot_station_counts(trip_counts).save("partI-6pm.html")
plot_station_counts(trip_counts)