In this notebook, I will visualize the LA Metro data in different ways

Imports

In [1]:
import pandas as pd
import folium
from folium.plugins import MarkerCluster

Read the csv file

In [2]:
trips_df = pd.read_csv("2020 Q1 Cleaned Bike Data.csv")
trips_df

Unnamed: 0,trip_id,duration,start_time,end_time,start_station,start_lat,start_lon,end_station,end_lat,end_lon,bike_id,plan_duration,trip_route_category,passholder_type,bike_type,stop_time,hour,start_station_name,end_station_name
0,134867493,25,2020-01-01 00:16:00,1/1/2020 0:41,3063,34.048038,-118.253738,4491,34.047440,-118.247940,18419,30,One Way,Monthly Pass,electric,2020-01-01 00:41:00,0,Olive & 5th,Main & Winston
1,134867799,35,2020-01-01 00:24:00,1/1/2020 0:59,4285,,,4354,34.017681,-118.409081,15661,1,One Way,One Day Pass,smart,2020-01-01 00:59:00,0,Not Found,Venice & Glendon
2,134868104,37,2020-01-01 00:31:00,1/1/2020 1:08,4344,34.014309,-118.491341,4322,34.005871,-118.429161,15848,1,One Way,Walk-up,smart,2020-01-01 01:08:00,0,Downtown Santa Monica Expo Line Station,Venice & Inglewood
3,134868103,36,2020-01-01 00:32:00,1/1/2020 1:08,4344,34.014309,-118.491341,4322,34.005871,-118.429161,16053,1,One Way,Walk-up,smart,2020-01-01 01:08:00,0,Downtown Santa Monica Expo Line Station,Venice & Inglewood
4,134868102,35,2020-01-01 00:33:00,1/1/2020 1:08,4344,34.014309,-118.491341,4322,34.005871,-118.429161,15694,1,One Way,Walk-up,smart,2020-01-01 01:08:00,0,Downtown Santa Monica Expo Line Station,Venice & Inglewood
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76386,140275195,65,2020-03-31 22:48:00,3/31/2020 23:53,4460,33.994850,-118.417961,4460,33.994850,-118.417961,15300,30,Round Trip,Monthly Pass,smart,2020-03-31 23:53:00,22,Culver & Inglewood,Culver & Inglewood
76387,140274593,23,2020-03-31 23:00:00,3/31/2020 23:23,3076,34.040600,-118.253838,3031,34.044701,-118.252441,16811,30,One Way,Monthly Pass,electric,2020-03-31 23:23:00,23,9th & Los Angeles,Not Found
76388,140274893,14,2020-03-31 23:23:00,3/31/2020 23:37,3031,34.044701,-118.252441,3076,34.040600,-118.253838,16811,30,One Way,Monthly Pass,electric,2020-03-31 23:37:00,23,Not Found,9th & Los Angeles
76389,140275293,21,2020-03-31 23:37:00,3/31/2020 23:58,3076,34.040600,-118.253838,3056,34.037460,-118.265381,16811,30,One Way,Monthly Pass,electric,2020-03-31 23:58:00,23,9th & Los Angeles,Grand & 14th


# Part One

We will visualize the data and see what areas had more arrivals/departures within a specific hour

Grab data from a specifc time (in this case 6am)

In [3]:
def get_trip_counts_by_hour (selected_hour):
    # select the first occurrence of each station id
    locations = trips_df.groupby("start_station").first()
    # and select only the three columns we are interested in
    locations = locations.loc[:, ["start_lat",
                                  "start_lon",
                                  "start_station_name"]]

    subset = trips_df[trips_df["hour"]==selected_hour]
    departure_counts =  subset.groupby("start_station").count()
    # select one column
    departure_counts = departure_counts.iloc[:,[0]]
    # and rename that column
    departure_counts.columns= ["Departure Count"]

    arrival_counts =  subset.groupby("end_station").count().iloc[:,[0]]
    arrival_counts.columns= ["Arrival Count"]
    
    
    trip_counts = departure_counts.join(locations).join(arrival_counts)
    #Let's quickly remove any Nans
    trip_counts.isna().sum()
    trip_counts = trip_counts[trip_counts['start_lat'].notna()]
    trip_counts = trip_counts[trip_counts['start_lon'].notna()]
    
    return trip_counts

#sample to check the function works
get_trip_counts_by_hour(6).head()

Unnamed: 0_level_0,Departure Count,start_lat,start_lon,start_station_name,Arrival Count
start_station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3005,27,34.0485,-118.258537,7th & Flower,75.0
3006,2,34.04554,-118.256668,Olive & 8th,1.0
3007,3,34.05048,-118.254593,5th & Grand,29.0
3008,4,34.046612,-118.262733,Figueroa & 9th,8.0
3011,3,34.04113,-118.267982,Figueroa & Pico,4.0


This function creates a map and will plot the bike data to show departures/arrivals at a specific time
- Tangerine means there was more departures
- Teal means there was more arrivals

In [13]:
def plot_station_counts(trip_counts):
    #Create our map of LA
    arrival_departure_map = folium.Map(location=[34.0522,-118.2437],
                                       zoom_start=11,
                                       tiles='CartoDB dark_matter')
    #Add a circle marker for each row
    for index, row in trip_counts.iterrows():
            net_departures = (row["Departure Count"]-row["Arrival Count"])
        
            #Popup message for each circle
            popup_text = "{}<br> total departures: {}<br> total arrivals: {}<br> net departures: {}"
            popup_text = popup_text.format(row["start_station_name"],
                                           row["Arrival Count"],
                                           row["Departure Count"],
                                           net_departures)
            #radius of the circles
            radius = net_departures/20
            
            if net_departures>0:
                color="#E37222" # tangerine
            else:
                color="#0A8A9F" # teal
            
            #Adds the marker to the map
            folium.CircleMarker(location=(row["start_lat"],
                                          row["start_lon"]),
                                radius=radius,
                                color=color,
                                popup=popup_text,
                                fill=True).add_to(arrival_departure_map)
        
    return arrival_departure_map

Map at 7 am

In [14]:
trip_counts = get_trip_counts_by_hour(7)
plot_station_counts(trip_counts)

9am

In [15]:
trip_counts = get_trip_counts_by_hour(9)
plot_station_counts(trip_counts)

12 o'clock or lunchtime!

In [16]:
trip_counts = get_trip_counts_by_hour(12)
plot_station_counts(trip_counts)

6pm (leaving work)

In [17]:
trip_counts = get_trip_counts_by_hour(6)
plot_station_counts(trip_counts)

8pm

In [18]:
trip_counts = get_trip_counts_by_hour(8)
plot_station_counts(trip_counts)