In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import folium as fm

In [2]:
bike_share = pd.read_csv("bike-trip.csv")
bike_share.head()

Unnamed: 0,duration_sec,start_time,end_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bike_id,user_type,member_birth_year,member_gender,bike_share_for_all_trip
0,52185,2019-02-28 17:32:10.1450,2019-03-01 08:01:55.9750,21.0,Montgomery St BART Station (Market St at 2nd St),37.789625,-122.400811,13.0,Commercial St at Montgomery St,37.794231,-122.402923,4902,Customer,1984.0,Male,No
1,42521,2019-02-28 18:53:21.7890,2019-03-01 06:42:03.0560,23.0,The Embarcadero at Steuart St,37.791464,-122.391034,81.0,Berry St at 4th St,37.77588,-122.39317,2535,Customer,,,No
2,61854,2019-02-28 12:13:13.2180,2019-03-01 05:24:08.1460,86.0,Market St at Dolores St,37.769305,-122.426826,3.0,Powell St BART Station (Market St at 4th St),37.786375,-122.404904,5905,Customer,1972.0,Male,No
3,36490,2019-02-28 17:54:26.0100,2019-03-01 04:02:36.8420,375.0,Grove St at Masonic Ave,37.774836,-122.446546,70.0,Central Ave at Fell St,37.773311,-122.444293,6638,Subscriber,1989.0,Other,No
4,1585,2019-02-28 23:54:18.5490,2019-03-01 00:20:44.0740,7.0,Frank H Ogawa Plaza,37.804562,-122.271738,222.0,10th Ave at E 15th St,37.792714,-122.24878,4898,Subscriber,1974.0,Male,Yes


In [3]:
bike_share.shape

(183412, 16)

In [4]:
bike_share.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 183412 entries, 0 to 183411
Data columns (total 16 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   duration_sec             183412 non-null  int64  
 1   start_time               183412 non-null  object 
 2   end_time                 183412 non-null  object 
 3   start_station_id         183215 non-null  float64
 4   start_station_name       183215 non-null  object 
 5   start_station_latitude   183412 non-null  float64
 6   start_station_longitude  183412 non-null  float64
 7   end_station_id           183215 non-null  float64
 8   end_station_name         183215 non-null  object 
 9   end_station_latitude     183412 non-null  float64
 10  end_station_longitude    183412 non-null  float64
 11  bike_id                  183412 non-null  int64  
 12  user_type                183412 non-null  object 
 13  member_birth_year        175147 non-null  float64
 14  memb

In [5]:
bike_share.isna().sum()

duration_sec                  0
start_time                    0
end_time                      0
start_station_id            197
start_station_name          197
start_station_latitude        0
start_station_longitude       0
end_station_id              197
end_station_name            197
end_station_latitude          0
end_station_longitude         0
bike_id                       0
user_type                     0
member_birth_year          8265
member_gender              8265
bike_share_for_all_trip       0
dtype: int64

#### Task 1

In [6]:
# Group the data by station and count the number of rides
rides_per_start = bike_share.groupby(['start_station_id', 'start_station_name', 'start_station_latitude', 'start_station_longitude']).size().reset_index(name = 'ride_count')

# Display the resulting DataFrame
rides_per_start

Unnamed: 0,start_station_id,start_station_name,start_station_latitude,start_station_longitude,ride_count
0,3.0,Powell St BART Station (Market St at 4th St),37.786375,-122.404904,2760
1,4.0,Cyril Magnin St at Ellis St,37.785881,-122.408915,610
2,5.0,Powell St BART Station (Market St at 5th St),37.783899,-122.408445,2327
3,6.0,The Embarcadero at Sansome St,37.804770,-122.403234,2082
4,7.0,Frank H Ogawa Plaza,37.804562,-122.271738,827
...,...,...,...,...,...
324,385.0,Woolsey St at Sacramento St,37.850578,-122.278175,359
325,386.0,24th St at Bartlett St,37.752105,-122.419724,464
326,388.0,Backesto Park (Jackson St at 13th St),37.352887,-121.886050,17
327,389.0,Taylor St at 9th St,37.353062,-121.891937,13


In [7]:
# Easy readability of column names

rides_per_start.columns = ['station_id', 'station_name', 'latitude', 'longitude', 'ride_count']

#### Task 2

In [8]:
# Calculate the most popular stations
popular_stations = bike_share['start_station_name'].value_counts().head(5)

# Calculate the least popular stations
least_popular_stations = bike_share['start_station_name'].value_counts().tail(5)

# Calculate the average number of rides per station
avg_rides_per_station = bike_share.groupby('start_station_name').size().mean()

In [9]:
# Print the results
print("Most popular stations:")
print(popular_stations)

print("\nLeast popular stations:")
print(least_popular_stations)

print("\nAverage number of rides per station:", avg_rides_per_station)

Most popular stations:
start_station_name
Market St at 10th St                                         3904
San Francisco Caltrain Station 2  (Townsend St at 4th St)    3544
Berry St at 4th St                                           3052
Montgomery St BART Station (Market St at 2nd St)             2895
Powell St BART Station (Market St at 4th St)                 2760
Name: count, dtype: int64

Least popular stations:
start_station_name
Willow St at Vine St              9
Parker Ave at McAllister St       7
Palm St at Willow St              4
21st Ave at International Blvd    4
16th St Depot                     2
Name: count, dtype: int64

Average number of rides per station: 556.8844984802431


#### Task 3 & 4

In [10]:
# Create a new folium map centered on the city coordinates
map_center = [37.77, -122.42]
zoom_level = 12 

# Create the folium map
bike_map = fm.Map(location = map_center, zoom_start = zoom_level)

# Customize marker appearance and add popup for each station
for index, row in rides_per_start.iterrows():
    station_name = row['station_name']
    latitude = row['latitude']
    longitude = row['longitude']
    ride_count = row['ride_count']
    
    # Customize marker appearance based on ride count
    if ride_count < 1000:
        color = 'green'
    elif ride_count < 4000:
        color = 'orange'
    else:
        color = 'red'
    
    # Create folium marker with popup
    marker = fm.Marker(
        location = [latitude, longitude],
        popup = f'Station: {station_name}<br>Rides: {ride_count}',
        icon = fm.Icon(color = color, icon = 'info-sign')
    )
    
    # Add the marker to the map
    marker.add_to(bike_map)

In [11]:
# Save the folium map as an HTML file
bike_map.save('bike_map.html')

In [12]:
# Display the map
bike_map

#### Task 5

+ Most Popular and Least Popular Stations: The analysis revealed the most popular and least popular bike share stations based on the number of rides originating from each station. This information can be used by bike share operators to allocate resources more effectively. Popular stations may require additional bikes or docking space to meet the demand, while less popular stations may need promotional efforts or incentives to increase ridership.
+ Geographic Distribution of Stations: The folium map with markers provides insights into the geographic distribution of bike share stations. From the map the majority of stations. are in the less than 1000 ride count.


Overall, the information gathered from the analysis and visualization of the bike share system can be used by city planners and bike share operators to optimize the system in the following ways:

+ Resource Allocation: The data can be utilized to allocate bikes, docking space, and other resources based on the popularity of stations and usage patterns, ensuring that supply meets demand.
+ Expansion Planning: Insights on station distribution and usage patterns can aid in identifying areas for expanding the bike share system, enhancing accessibility and coverage across the city.
+ Pricing and Marketing Strategies: Understanding the relationship between gender, station, and ridership can help refine pricing strategies and design targeted marketing campaigns to attract specific demographic segments.
+ Operational Efficiency: Analyzing usage patterns can guide bike redistribution efforts, station maintenance scheduling, and customer support services, optimizing the system's overall efficiency and user experience.