In [19]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [4]:
from sklearn.linear_model import LinearRegression

In [5]:
data = pd.read_csv(r"C:\Users\NAjani\Jupyter/ChicagoDivvyBikeShare/202111-divvy-tripdata.csv") 

In [6]:
data.columns

Index(['ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual'],
      dtype='object')

In [8]:

start_station_counts = data['start_station_name'].value_counts()
print(start_station_counts)

start_station_name
Ellis Ave & 60th St             3095
University Ave & 57th St        2833
Kingsbury St & Kinzie St        2681
Clark St & Elm St               2423
Streeter Dr & Grand Ave         2165
                                ... 
Richmond St & 59th St              1
Kildare Ave & 26th St              1
Sacramento Blvd & Fulton Ave       1
Sacramento Blvd & Polk St          1
Francisco Ave & 47th St            1
Name: count, Length: 815, dtype: int64


In [10]:
# Identify the most and least popular start stations
most_popular_start_station = start_station_counts.idxmax()
most_popular_start_station_count = start_station_counts.max()

least_popular_start_station = start_station_counts.idxmin()
least_popular_start_station_count = start_station_counts.min()

print("Most Popular Start Station:", most_popular_start_station, "with", most_popular_start_station_count, "trips")
print("Least Popular Start Station:", least_popular_start_station, "with", least_popular_start_station_count, "trips")

Most Popular Start Station: Ellis Ave & 60th St with 3095 trips
Least Popular Start Station: Western Ave & Lake St with 1 trips


In [11]:
end_station_counts = data['end_station_name'].value_counts()
print(end_station_counts)

end_station_name
Ellis Ave & 60th St           3002
University Ave & 57th St      2966
Kingsbury St & Kinzie St      2545
Clark St & Elm St             2391
Streeter Dr & Grand Ave       2350
                              ... 
Panama Ave & Grace St            1
S Aberdeen St & W 106th St       1
Doty Ave & 111th St              1
W 103rd St & S Avers Ave         1
Francisco Ave & 47th St          1
Name: count, Length: 805, dtype: int64


In [12]:
# Identify the most and least popular start stations
most_popular_end_station = end_station_counts.idxmax()
most_popular_end_station_count = end_station_counts.max()

least_popular_end_station = end_station_counts.idxmin()
least_popular_end_station_count = end_station_counts.min()

print("Most Popular End Station:", most_popular_end_station, "with", most_popular_end_station_count, "trips")
print("Least Popular End Station:", least_popular_end_station, "with", least_popular_end_station_count, "trips")

Most Popular End Station: Ellis Ave & 60th St with 3002 trips
Least Popular End Station: Roscoe & Harlem with 1 trips


In [13]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in kilometers
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    return R * c

In [20]:
data['distance_km'] = haversine(data['start_lat'], data['start_lng'], data['end_lat'], data['end_lng'])

In [22]:
average_distance_km = data['distance_km'].mean()

print("Average Distance Traveled:", average_distance_km, "km")

Average Distance Traveled: 1.9367633385063525 km


In [24]:
# Group by member type and calculate the average distance
average_distance_by_member_type = data.groupby('member_casual')['distance_km'].mean()

print("Average Distance Traveled by Member Type:")
print(average_distance_by_member_type)

Average Distance Traveled by Member Type:
member_casual
casual    2.097775
member    1.868804
Name: distance_km, dtype: float64


In [26]:
# Group by member type and calculate the average distance
average_distance_by_rideable_type = data.groupby('rideable_type')['distance_km'].mean()

print("Average Distance Traveled by Bike Type:")
print(average_distance_by_rideable_type)

Average Distance Traveled by Bike Type:
rideable_type
classic_bike     1.764115
docked_bike      2.063917
electric_bike    2.065886
Name: distance_km, dtype: float64


In [27]:
# Group and count routes
route_counts = data.groupby(['start_station_name', 'end_station_name']).size().reset_index(name='count')

# Find the most common route
most_common_route = route_counts.loc[route_counts['count'].idxmax()]

print("Most Common Route:", most_common_route['start_station_name'], "to", most_common_route['end_station_name'])

Most Common Route: Ellis Ave & 60th St to University Ave & 57th St
