In [2]:
import quandl
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import statsmodels.api as sm # Using .api imports the public access version of statsmodels, which is a library that handles 
# statistical models.
import os
import warnings # This is a library that handles warnings.

warnings.filterwarnings("ignore") # Disable deprecation warnings that could indicate, for instance, a suspended library or 
# feature. These are more relevant to developers and very seldom to analysts.

plt.style.use('fivethirtyeight') # This is a styling option for how your plots will appear. More examples here:
# https://matplotlib.org/3.2.1/tutorials/introductory/customizing.html
# https://matplotlib.org/3.1.0/gallery/style_sheets/fivethirtyeight.html

In [3]:
#folder shortcut
path = r'C:\Users\hp\Documents\Citi Bike'

In [4]:
citi = pd.read_csv(os.path.join(path, '02 Data','prepared data', 'citi_8.csv'), index_col = False)

In [5]:
citi.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,day_of_week,start_hour,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_station_id,end_station_name,...,end_station_longitude,trip_duration,subscriber,birth_year,gender,ride_start,ride_end,Trip category,ride_start_date,ride_start_time
0,0,0,Mon,18,523,W 38 St & 8 Ave,40.754666,-73.991382,334,W 20 St & 7 Ave,...,-73.997262,993,Subscriber,1968.0,2,2013-09-09 18:18:55,2013-09-09 18:35:28,10-20 minutes,2013-09-09,2023-08-23 18:18:55
1,1,1,Thu,18,257,Lispenard St & Broadway,40.719392,-74.002472,236,St Marks Pl & 2 Ave,...,-73.98714,581,Subscriber,1983.0,1,2013-09-12 18:38:53,2013-09-12 18:48:34,under 10 minutes,2013-09-12,2023-08-23 18:38:53
2,2,2,Wed,19,479,9 Ave & W 45 St,40.760193,-73.991255,513,W 56 St & 10 Ave,...,-73.988639,361,Subscriber,1989.0,1,2013-09-18 19:44:04,2013-09-18 19:50:05,under 10 minutes,2013-09-18,2023-08-23 19:44:04
3,3,3,Sat,11,527,E 33 St & 1 Ave,40.743156,-73.974347,441,E 52 St & 2 Ave,...,-73.967416,561,Subscriber,1988.0,2,2013-09-28 11:54:37,2013-09-28 12:03:58,under 10 minutes,2013-09-28,2023-08-23 11:54:37
4,4,4,Fri,22,293,Lafayette St & E 8 St,40.730287,-73.990765,432,E 7 St & Avenue A,...,-73.983799,427,Subscriber,1986.0,2,2013-09-27 22:01:01,2013-09-27 22:08:08,under 10 minutes,2013-09-27,2023-08-23 22:01:01


In [6]:
# Define data types for specific columns
data_types = {
    'ride_start_date': 'datetime64'
}

In [7]:
file_path = os.path.join(path, '02 Data','prepared data','citi_8.csv')

In [8]:
parse_dates = ['ride_start_date']

In [9]:
citi = pd.read_csv(file_path, parse_dates=parse_dates)

In [10]:
# ride count per day
day_counts = citi.groupby( citi['ride_start_date'].dt.date)['ride_start_date'].count()

In [11]:
day_counts

ride_start_date
2013-09-01     729
2013-09-02     708
2013-09-03    1331
2013-09-04    1469
2013-09-05    1372
2013-09-06    1452
2013-09-07    1134
2013-09-08    1129
2013-09-09    1393
2013-09-10    1579
2013-09-11    1532
2013-09-12    1339
2013-09-13    1630
2013-09-14    1129
2013-09-15    1169
2013-09-16    1291
2013-09-17    1596
2013-09-18    1600
2013-09-19    1578
2013-09-20    1703
2013-09-21    1165
2013-09-22    1061
2013-09-23    1498
2013-09-24    1662
2013-09-25    1598
2013-09-26    1683
2013-09-27    1535
2013-09-28    1248
2013-09-29    1102
2013-09-30    1602
Name: ride_start_date, dtype: int64

In [12]:
result = pd.DataFrame({'date': day_counts.index, 'ride_count': day_counts.values})

In [13]:
# ride count for each day
result


Unnamed: 0,date,ride_count
0,2013-09-01,729
1,2013-09-02,708
2,2013-09-03,1331
3,2013-09-04,1469
4,2013-09-05,1372
5,2013-09-06,1452
6,2013-09-07,1134
7,2013-09-08,1129
8,2013-09-09,1393
9,2013-09-10,1579


In [14]:
day_of_week_counts = citi.groupby('day_of_week')['ride_start_date'].count()

In [15]:
day_of_week_counts

day_of_week
Fri    6320
Mon    6492
Sat    4676
Sun    5190
Thu    5972
Tue    6168
Wed    6199
Name: ride_start_date, dtype: int64

In [16]:
result_1 = pd.DataFrame({'day_of_week': day_of_week_counts.index, 'ride_count': day_of_week_counts.values})

In [17]:
# busiest day of week 
result_1

Unnamed: 0,day_of_week,ride_count
0,Fri,6320
1,Mon,6492
2,Sat,4676
3,Sun,5190
4,Thu,5972
5,Tue,6168
6,Wed,6199


In [18]:
# busiest hour
hour_counts = citi.groupby('start_hour')['ride_start_date'].count()

In [19]:
hour_counts

start_hour
0      403
1      257
2      158
3       90
4       73
5      191
6      863
7     1908
8     3023
9     2504
10    1759
11    1747
12    2258
13    2306
14    2259
15    2316
16    2866
17    4122
18    4041
19    2817
20    1848
21    1319
22    1109
23     780
Name: ride_start_date, dtype: int64

In [20]:
# rename coiumns
result_2 = pd.DataFrame({'hour': hour_counts.index, 'ride_count': hour_counts.values})

In [21]:
result_2

Unnamed: 0,hour,ride_count
0,0,403
1,1,257
2,2,158
3,3,90
4,4,73
5,5,191
6,6,863
7,7,1908
8,8,3023
9,9,2504


In [22]:
# hour counts of ride per day of week
hour_counts_on_day = citi.groupby([citi['day_of_week'], citi['start_hour']])['ride_start_date'].count()

In [23]:
hour_counts_on_day

day_of_week  start_hour
Fri          0              45
             1              33
             2              22
             3               8
             4               9
                          ... 
Wed          19            457
             20            281
             21            206
             22            181
             23             99
Name: ride_start_date, Length: 168, dtype: int64

In [24]:
hour_counts_on_day.info()

<class 'pandas.core.series.Series'>
MultiIndex: 168 entries, ('Fri', 0) to ('Wed', 23)
Series name: ride_start_date
Non-Null Count  Dtype
--------------  -----
168 non-null    int64
dtypes: int64(1)
memory usage: 2.3+ KB


In [25]:
result_3 = hour_counts_on_day.reset_index()

In [26]:
result_3

Unnamed: 0,day_of_week,start_hour,ride_start_date
0,Fri,0,45
1,Fri,1,33
2,Fri,2,22
3,Fri,3,8
4,Fri,4,9
...,...,...,...
163,Wed,19,457
164,Wed,20,281
165,Wed,21,206
166,Wed,22,181


In [27]:
result_max = result_3.groupby(['day_of_week', 'start_hour']).agg({
    'ride_start_date': 'max'
    
}).reset_index()


In [28]:
result_max


Unnamed: 0,day_of_week,start_hour,ride_start_date
0,Fri,0,45
1,Fri,1,33
2,Fri,2,22
3,Fri,3,8
4,Fri,4,9
...,...,...,...
163,Wed,19,457
164,Wed,20,281
165,Wed,21,206
166,Wed,22,181


In [29]:
result_max = result_3.groupby(['day_of_week', 'start_hour']).agg({
    'ride_start_date': 'count'
}).reset_index()

In [30]:
idx = result_max.groupby('day_of_week')['ride_start_date'].idxmax()

In [31]:
busiest_hours = result_max.loc[idx]

In [32]:
busiest_hours

Unnamed: 0,day_of_week,start_hour,ride_start_date
0,Fri,0,1
24,Mon,0,1
48,Sat,0,1
72,Sun,0,1
96,Thu,0,1
120,Tue,0,1
144,Wed,0,1


In [33]:
# trying to find the busiest hour on each day of the week and how many rides there were
result_max = result_3.groupby(['day_of_week', 'start_hour']).size().reset_index(name='ride_count')
max_hours = result_max.loc[result_max.groupby('day_of_week')['ride_count'].idxmax()]

In [34]:
max_hours

Unnamed: 0,day_of_week,start_hour,ride_count
0,Fri,0,1
24,Mon,0,1
48,Sat,0,1
72,Sun,0,1
96,Thu,0,1
120,Tue,0,1
144,Wed,0,1


In [35]:
result_3

Unnamed: 0,day_of_week,start_hour,ride_start_date
0,Fri,0,45
1,Fri,1,33
2,Fri,2,22
3,Fri,3,8
4,Fri,4,9
...,...,...,...
163,Wed,19,457
164,Wed,20,281
165,Wed,21,206
166,Wed,22,181


In [36]:
# busiest hour based on day of week 
result_max = citi.groupby(['day_of_week', 'start_hour'])['ride_start_date'].count().reset_index()
result_max = result_max.rename(columns={'ride_start_date': 'ride_count'})

max_rides = result_max.groupby('day_of_week').apply(lambda x: x[x['ride_count'] == x['ride_count'].max()]).reset_index(drop=True)

print(max_rides)

  day_of_week  start_hour  ride_count
0         Fri          17         695
1         Mon          18         720
2         Sat          15         410
3         Sun          16         447
4         Thu          18         660
5         Tue          17         663
6         Wed          18         674


In [37]:
# seeing subscribers vs non subscribers
subscriber_counts = citi.groupby(['subscriber', 'ride_start_date'])['ride_start_date'].count()

print(subscriber_counts)

subscriber  ride_start_date
Subscriber  2013-09-01          729
            2013-09-02          708
            2013-09-03         1331
            2013-09-04         1469
            2013-09-05         1372
            2013-09-06         1452
            2013-09-07         1134
            2013-09-08         1129
            2013-09-09         1393
            2013-09-10         1579
            2013-09-11         1532
            2013-09-12         1339
            2013-09-13         1630
            2013-09-14         1129
            2013-09-15         1169
            2013-09-16         1291
            2013-09-17         1596
            2013-09-18         1600
            2013-09-19         1578
            2013-09-20         1703
            2013-09-21         1165
            2013-09-22         1061
            2013-09-23         1498
            2013-09-24         1662
            2013-09-25         1598
            2013-09-26         1683
            2013-09-27         1535


In [38]:
# subscirber count
subscriber_counts = citi.groupby('subscriber')['ride_start_date'].count()

print(subscriber_counts)

subscriber
Subscriber    41017
Name: ride_start_date, dtype: int64


In [39]:
# avg age of rider by day
avg_age = citi.groupby('ride_start_date')['birth_year'].mean()

In [40]:
avg_age

ride_start_date
2013-09-01    1977.149520
2013-09-02    1976.199153
2013-09-03    1975.217130
2013-09-04    1975.466304
2013-09-05    1975.263120
2013-09-06    1975.752066
2013-09-07    1977.240741
2013-09-08    1976.562445
2013-09-09    1975.055994
2013-09-10    1975.308423
2013-09-11    1975.351828
2013-09-12    1974.932786
2013-09-13    1975.086503
2013-09-14    1976.350753
2013-09-15    1976.777588
2013-09-16    1975.967467
2013-09-17    1975.327694
2013-09-18    1974.927500
2013-09-19    1975.581749
2013-09-20    1975.631826
2013-09-21    1976.933047
2013-09-22    1977.058435
2013-09-23    1975.447931
2013-09-24    1975.161252
2013-09-25    1975.439299
2013-09-26    1975.111705
2013-09-27    1975.798046
2013-09-28    1977.386218
2013-09-29    1977.147005
2013-09-30    1975.194132
Name: birth_year, dtype: float64

In [72]:
# seeing how many rides start at each station
start_station_count = citi.groupby(['start_station_name', 'start_station_longitude', 'start_station_latitude'])['ride_start_date'].count().reset_index()



In [73]:
start_station_count

Unnamed: 0,start_station_name,start_station_longitude,start_station_latitude,ride_start_date
0,1 Ave & E 15 St,-73.981656,40.732219,191
1,1 Ave & E 18 St,-73.980243,40.734161,143
2,1 Ave & E 30 St,-73.975361,40.741444,126
3,1 Ave & E 44 St,-73.969053,40.750020,54
4,10 Ave & W 28 St,-74.001768,40.750664,140
...,...,...,...,...
325,Willoughby Ave & Hall St,-73.965369,40.691960,36
326,Willoughby Ave & Walworth St,-73.953820,40.693317,23
327,Willoughby St & Fleet St,-73.981302,40.691966,52
328,Wythe Ave & Metropolitan Ave,-73.963198,40.716887,60


In [43]:
# Finding the busiest start station
busiest_start_station = start_station_count.idxmax()
busiest_start_station_count = start_station_count.max()

In [44]:

print("The busiest start station is:", busiest_start_station)
print("Number of rides from the busiest start station:", busiest_start_station_count)

The busiest start station is: Pershing Square N
Number of rides from the busiest start station: 451


In [45]:
# Sorting stations by count to find the top 5 busiest and least busy
top_5_busiest = start_station_count.nlargest(5)
top_5_least_busy = start_station_count.nsmallest(5)

print("Top 5 busiest start stations:")
print(top_5_busiest)

print("\nTop 5 least busy start stations:")
print(top_5_least_busy)

Top 5 busiest start stations:
start_station_name
Pershing Square N        451
Lafayette St & E 8 St    446
E 17 St & Broadway       432
W 20 St & 11 Ave         424
8 Ave & W 31 St          359
Name: ride_start_date, dtype: int64

Top 5 least busy start stations:
start_station_name
Cadman Plaza E & Red Cross Pl    2
Franklin Ave & Myrtle Ave        3
7 Ave & Farragut St              4
Railroad Ave & Kay Ave           5
Park Ave & St Edwards St         7
Name: ride_start_date, dtype: int64


In [83]:
day_start_station_count = citi.groupby(['start_station_name', 'day_of_week'])['ride_start_date'].count().reset_index()

In [84]:
day_start_station_count

Unnamed: 0,start_station_name,day_of_week,ride_start_date
0,1 Ave & E 15 St,Fri,22
1,1 Ave & E 15 St,Mon,35
2,1 Ave & E 15 St,Sat,28
3,1 Ave & E 15 St,Sun,31
4,1 Ave & E 15 St,Thu,31
...,...,...,...
2267,York St & Jay St,Sat,18
2268,York St & Jay St,Sun,7
2269,York St & Jay St,Thu,11
2270,York St & Jay St,Tue,14


In [104]:


# Finding the busiest start station for each day of the week
busiest_stations_by_day = day_start_station_count.groupby('day_of_week')('ride_start_date'].count().idxmax()

# Printing the results
days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

SyntaxError: closing parenthesis ']' does not match opening parenthesis '(' (253284030.py, line 2)

In [105]:
busiest_stations_by_day

Unnamed: 0,day_of_week,start_station_name,ride_start_date
0,Fri,1 Ave & E 15 St,1
1,Fri,1 Ave & E 18 St,1
2,Fri,1 Ave & E 30 St,1
3,Fri,1 Ave & E 44 St,1
4,Fri,10 Ave & W 28 St,1
...,...,...,...
2267,Wed,Willoughby Ave & Hall St,1
2268,Wed,Willoughby Ave & Walworth St,1
2269,Wed,Willoughby St & Fleet St,1
2270,Wed,Wythe Ave & Metropolitan Ave,1


In [50]:
# trying to see the busiest start station based on hour
hour_start_station_count = citi.groupby(['start_station_name', 'start_hour'])['ride_start_date'].count()

In [51]:
hour_start_station_count

start_station_name  start_hour
1 Ave & E 15 St     0             6
                    1             2
                    2             3
                    3             1
                    4             2
                                 ..
York St & Jay St    19            9
                    20            4
                    21            3
                    22            6
                    23            2
Name: ride_start_date, Length: 6064, dtype: int64

In [52]:
hour_start_station_count = citi.groupby(['start_station_name', 'start_hour'])['ride_start_date'].count()

# Finding the busiest start station for each hour of the day
busiest_stations_by_hour = hour_start_station_count.groupby('start_hour').idxmax()

In [53]:
busiest_stations_by_hour

start_hour
0      (Allen St & Rivington St, 0)
1             (W 4 St & 7 Ave S, 1)
2            (E 7 St & Avenue A, 2)
3            (E 7 St & Avenue A, 3)
4              (8 Ave & W 33 St, 4)
5     (E 43 St & Vanderbilt Ave, 5)
6              (W 41 St & 8 Ave, 6)
7              (W 41 St & 8 Ave, 7)
8            (Pershing Square N, 8)
9              (W 33 St & 7 Ave, 9)
10         (E 17 St & Broadway, 10)
11      (West St & Chambers St, 11)
12           (W 4 St & 7 Ave S, 12)
13         (E 17 St & Broadway, 13)
14         (E 17 St & Broadway, 14)
15         (E 17 St & Broadway, 15)
16         (E 17 St & Broadway, 16)
17          (Pershing Square N, 17)
18          (Pershing Square N, 18)
19           (W 20 St & 11 Ave, 19)
20      (Lafayette St & E 8 St, 20)
21            (8 Ave & W 31 St, 21)
22      (Lafayette St & E 8 St, 22)
23           (W 4 St & 7 Ave S, 23)
Name: ride_start_date, dtype: object

In [54]:
result_1

Unnamed: 0,day_of_week,ride_count
0,Fri,6320
1,Mon,6492
2,Sat,4676
3,Sun,5190
4,Thu,5972
5,Tue,6168
6,Wed,6199


In [55]:
# saving to csv
result_1.to_csv(os.path.join(path, '02 Data','prepared data', 'day_of_week.csv'))

In [56]:
# saving to csv
result_2.to_csv(os.path.join(path, '02 Data','prepared data', 'hour_of_day.csv'))

In [57]:
# saving to csv
result.to_csv(os.path.join(path, '02 Data','prepared data', 'day_of_month.csv'))

In [58]:
# saving to csv
start_station_count.to_csv(os.path.join(path, '02 Data','prepared data', 'station_count.csv'))

In [59]:
# start hour and ride counts and the avg trip duration
hourly_aggregated = citi.groupby('start_hour').agg({
    'ride_start_date': 'count',          # Count of rides
    'trip_duration': 'mean'              # Average trip duration
}).reset_index()


In [60]:
hourly_aggregated

Unnamed: 0,start_hour,ride_start_date,trip_duration
0,0,403,677.868486
1,1,257,647.210117
2,2,158,620.936709
3,3,90,584.422222
4,4,73,616.589041
5,5,191,570.586387
6,6,863,598.979143
7,7,1908,668.484277
8,8,3023,705.284155
9,9,2504,668.77476


In [61]:
# how many riders there are based on birth year
age_counts = citi['birth_year'].value_counts()

In [62]:
age_counts

1985.0    1762
1981.0    1750
1982.0    1728
1984.0    1721
1983.0    1708
          ... 
1936.0       4
1934.0       2
1929.0       1
1926.0       1
1924.0       1
Name: birth_year, Length: 69, dtype: int64

In [63]:
# avg trip duration based on age
avg_trip_duration_by_age = citi.groupby('birth_year')['trip_duration'].mean() / 60

In [64]:
# saving to csv
avg_trip_duration_by_age.to_csv(os.path.join(path, '02 Data','prepared data', 'trip_dur.csv'))

In [65]:
avg_trip_duration_by_age

birth_year
1924.0     3.983333
1926.0     7.800000
1929.0    28.266667
1932.0    15.288333
1933.0     8.579167
            ...    
1993.0     9.627312
1994.0     9.556455
1995.0     9.951235
1996.0    11.956771
1997.0    14.385256
Name: trip_duration, Length: 69, dtype: float64

In [66]:
# avg birth year based on hour of ride
hour_avg_age = citi.groupby('start_hour')['birth_year'].mean()

In [67]:
hour_avg_age

start_hour
0     1979.364764
1     1979.521401
2     1978.848101
3     1981.111111
4     1974.657534
5     1975.727749
6     1974.477404
7     1974.099057
8     1974.921270
9     1974.972045
10    1974.802729
11    1974.760160
12    1975.172276
13    1975.002602
14    1975.042497
15    1974.935665
16    1974.916609
17    1975.317807
18    1976.312051
19    1977.108271
20    1978.429113
21    1978.040940
22    1978.482417
23    1978.702564
Name: birth_year, dtype: float64

In [68]:
years_to_drop = [1924, 1926, 1929]


In [69]:
# Create a list of birth years to drop
years_to_drop = [1924, 1926, 1929]

# Filter the DataFrame to exclude rows with these birth years
citi = citi[~citi['birth_year'].isin(years_to_drop)]


In [74]:
start_station_count

Unnamed: 0,start_station_name,start_station_longitude,start_station_latitude,ride_start_date
0,1 Ave & E 15 St,-73.981656,40.732219,191
1,1 Ave & E 18 St,-73.980243,40.734161,143
2,1 Ave & E 30 St,-73.975361,40.741444,126
3,1 Ave & E 44 St,-73.969053,40.750020,54
4,10 Ave & W 28 St,-74.001768,40.750664,140
...,...,...,...,...
325,Willoughby Ave & Hall St,-73.965369,40.691960,36
326,Willoughby Ave & Walworth St,-73.953820,40.693317,23
327,Willoughby St & Fleet St,-73.981302,40.691966,52
328,Wythe Ave & Metropolitan Ave,-73.963198,40.716887,60


In [76]:
# saving to csv
start_station_count.to_csv(os.path.join(path, '02 Data','prepared data', 'start_station.csv'))

In [77]:
#finding out the busiest end station 
end_station_count = citi.groupby(['end_station_name', 'end_station_longitude', 'end_station_latitude'])['ride_start_date'].count().reset_index()

In [78]:
end_station_count

Unnamed: 0,end_station_name,end_station_longitude,end_station_latitude,ride_start_date
0,1 Ave & E 15 St,-73.981656,40.732219,228
1,1 Ave & E 18 St,-73.980243,40.734161,134
2,1 Ave & E 30 St,-73.975361,40.741444,145
3,1 Ave & E 44 St,-73.969053,40.750020,58
4,10 Ave & W 28 St,-74.001768,40.750664,124
...,...,...,...,...
325,Willoughby Ave & Hall St,-73.965369,40.691960,27
326,Willoughby Ave & Walworth St,-73.953820,40.693317,27
327,Willoughby St & Fleet St,-73.981302,40.691966,40
328,Wythe Ave & Metropolitan Ave,-73.963198,40.716887,59


In [79]:
# saving to csv
end_station_count.to_csv(os.path.join(path, '02 Data','prepared data', 'end_station.csv'))

In [80]:
busiest_stations_by_day

day_of_week
Fri        (Pershing Square N, Fri)
Mon        (Pershing Square N, Mon)
Sat       (E 17 St & Broadway, Sat)
Sun    (Lafayette St & E 8 St, Sun)
Thu          (8 Ave & W 31 St, Thu)
Tue        (Pershing Square N, Tue)
Wed        (Pershing Square N, Wed)
Name: ride_start_date, dtype: object

In [87]:


busiest_stations_by_day_1 = day_start_station_count.groupby('day_of_week')('start_station_name')['ride_start_date'].idxmax()


TypeError: 'DataFrameGroupBy' object is not callable

In [86]:
busiest_stations_by_day_1

day_of_week
Fri    1578
Mon    1579
Sat     721
Sun    1337
Thu     111
Tue    1583
Wed    1584
Name: ride_start_date, dtype: int64

In [100]:
busiest_stations_by_day = day_start_station_count.groupby(['day_of_week', 'start_station_name'])['ride_start_date'].count()



In [92]:
busiest_stations_by_day_1

1578        Pershing Square N
1579        Pershing Square N
721        E 17 St & Broadway
1337    Lafayette St & E 8 St
111           8 Ave & W 31 St
1583        Pershing Square N
1584        Pershing Square N
Name: start_station_name, dtype: object

In [101]:
busiest_stations_by_day

day_of_week  start_station_name          
Fri          1 Ave & E 15 St                 1
             1 Ave & E 18 St                 1
             1 Ave & E 30 St                 1
             1 Ave & E 44 St                 1
             10 Ave & W 28 St                1
                                            ..
Wed          Willoughby Ave & Hall St        1
             Willoughby Ave & Walworth St    1
             Willoughby St & Fleet St        1
             Wythe Ave & Metropolitan Ave    1
             York St & Jay St                1
Name: ride_start_date, Length: 2272, dtype: int64

In [102]:


# Finding the busiest start station for each day of the week
busiest_stations_by_day = day_start_station_count.groupby('day_of_week')('start_station_name')('ride_start_date'].count().idxmax()

# Printing the results
days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

SyntaxError: closing parenthesis ']' does not match opening parenthesis '(' (1580004348.py, line 2)

In [103]:
# Group by 'day_of_week' and 'start_station_name', and calculate the total rides per station
busiest_stations_by_day = day_start_station_count.groupby(['day_of_week', 'start_station_name'])['ride_start_date'].count().reset_index()

# Find the station with the maximum rides for each day of the week
busiest_station_for_each_day = busiest_stations_by_day.groupby('day_of_week')['ride_start_date'].idxmax()

# Get the corresponding station name for each day of the week
result = busiest_stations_by_day.loc[busiest_station_for_each_day, ['day_of_week', 'start_station_name']]

# Printing the results
print(result)


     day_of_week start_station_name
0            Fri    1 Ave & E 15 St
327          Mon    1 Ave & E 15 St
654          Sat    1 Ave & E 15 St
977          Sun    1 Ave & E 15 St
1299         Thu    1 Ave & E 15 St
1623         Tue    1 Ave & E 15 St
1946         Wed    1 Ave & E 15 St
