#### Author : Maximiliano Lopez Salgado
#### First commit: 2023-05-11
#### Last commit: 2023-05-12
#### Description: This notebook is used to perform EDA on the bike data set

In [2]:
# import ML libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium


In [3]:
# import station_coordinates data
station_coordinates = pd.read_csv('/Users/maximilianolopezsalgado/data_projects/capital_bike_sharing/datasets/station_coordinates.csv', header=None)

# import trip_history data
trip_story = pd.read_csv('/Users/maximilianolopezsalgado/data_projects/capital_bike_sharing/datasets/trip_history.csv')

In [4]:
# Separate the station_coordinates with commas and rename the columns like this: 'station_name', 'station_latitude', 'station_longitude']
station_coordinates = station_coordinates[0].str.split(',', expand=True)
station_coordinates.columns = ['station_name', 'station_latitude', 'station_longitude']

# Check station_coordinates data
display(station_coordinates)

Unnamed: 0,station_name,station_latitude,station_longitude
0,Marymount University / Yorktown Blvd & 26th St N,38.904252,-77.127776
1,East Falls Church Metro / Sycamore St & 19th St N,38.88532071975074,-77.15642720460892
2,E Fairfax St & S Washington St,38.880612,-77.171891
3,Inlet Ct & Wiehle Dr,38.965979,-77.334359
4,Reservoir Rd & 38th St NW,38.91261359469192,-77.07432264076488
...,...,...,...
721,Vienna Metro South,38.877415,-77.270731
722,Vaden Dr & Royal Victoria Dr/Providence Commun...,38.873261,-77.272999
723,District Ave & Strawberry Ln/Mosaic Market Garage,38.872642,-77.230632
724,13th & O St NW/ Logan Circle,38.908735,-77.029779


In [5]:
# search for null values
print(station_coordinates.isnull().sum())

station_name         0
station_latitude     0
station_longitude    0
dtype: int64


In [6]:
# export station_coordinates_cleaned data
station_coordinates.to_csv('/Users/maximilianolopezsalgado/data_projects/capital_bike_sharing/datasets/station_coordinates_cleaned.csv', index=False)

In [7]:
# extract latitude and longitude from station_coordinates
station_coordinates['station_latitude'] = station_coordinates['station_latitude'].astype(float)
station_coordinates['station_longitude'] = station_coordinates['station_longitude'].astype(float)

# check the data types
print(station_coordinates.dtypes)


station_name          object
station_latitude     float64
station_longitude    float64
dtype: object


In [8]:
# create a list of tuples with the coordinates of the stations
station_coordinates_tuples = [(station['station_latitude'], station['station_longitude']) for station in station_coordinates.to_dict('records')]
print(station_coordinates_tuples)

[(38.904252, -77.127776), (38.88532071975074, -77.15642720460892), (38.880612, -77.171891), (38.965979, -77.334359), (38.91261359469192, -77.07432264076488), (38.843422, -77.064016), (38.88397, -77.10783), (38.929261, -77.240654), (38.7930647, -77.049436), (38.938889, -76.997226), (38.81909, -77.01033), (38.92812111132096, -77.0237946510315), (39.105642, -77.20386), (38.888255, -77.0494365), (38.874820772513424, -77.0207878947258), (38.870823939537914, -77.01191514730453), (39.00048, -77.00077), (38.908391, -76.843263), (38.968842, -76.954171), (38.965742, -76.954803), (38.883318, -76.925315), (38.966848, -77.329282), (39.038423828518326, -77.05102041363716), (38.932242581555066, -76.9961330294609), (38.89595455700754, -76.9678448436025), (38.8368376462948, -77.05727398395538), (38.959361, -76.946361), (38.89386, -77.070862), (38.983199, -77.341823), (38.86612, -77.08787), (38.83728, -77.060899), (38.9523, -77.361466), (38.879355, -77.230681), (38.915, -77.0078), (38.864838, -77.056873

In [9]:
# create a map with the coordinates of the stations
m = folium.Map(location=[38.9072, -77.0369], zoom_start=14)

# add markers to the map
for lat, lng in station_coordinates_tuples:
    folium.Marker(
        location=[lat, lng],
        icon=folium.Icon(color='blue', icon='bicycle', prefix='fa')
    ).add_to(m)

# display the map
m

In [10]:
# create a heatmap with the coordinates of the stations and the number of trips in 2011
