## 11.1 Introducing the Timestamp object

### 11.1.1 How Python works with datetimes

In [100]:
import datetime as dt
import pandas as pd

In [101]:
# The two lines below are equivalent
birthday = dt.date(1991, 4, 12)
birthday = dt.date(year = 1991, month = 4, day = 12)
birthday

datetime.date(1991, 4, 12)

In [102]:
birthday.year

1991

In [103]:
birthday.month

4

In [104]:
birthday.day

12

**NOTE**: I've commented out the code below so that the Notebook can run without raising an error.

In [105]:
# birthday.month = 10

In [106]:
# The two lines below are equivalent
alarm_clock = dt.time(6, 43, 25)
alarm_clock = dt.time(hour = 6, minute = 43, second = 25)
alarm_clock

datetime.time(6, 43, 25)

In [107]:
dt.time()

datetime.time(0, 0)

In [108]:
dt.time(hour = 9, second = 42)

datetime.time(9, 0, 42)

In [109]:
dt.time(hour = 19, minute = 43, second = 22)

datetime.time(19, 43, 22)

In [110]:
alarm_clock.hour

6

In [111]:
alarm_clock.minute

43

In [112]:
alarm_clock.second

25

In [113]:
# The two lines below are equivalent
moon_landing = dt.datetime(1969, 7, 20, 22, 56, 20)
moon_landing = dt.datetime(
    year = 1969,
    month = 7,
    day = 20,
    hour = 22,
    minute = 56,
    second = 20
)
moon_landing

datetime.datetime(1969, 7, 20, 22, 56, 20)

In [114]:
dt.datetime(2020, 1, 1)

datetime.datetime(2020, 1, 1, 0, 0)

In [115]:
dt.timedelta(
    weeks = 8,
    days = 6,
    hours = 3,
    minutes = 58,
    seconds = 12
)

datetime.timedelta(days=62, seconds=14292)

### 11.1.2 How `pandas` works with datetimes

In [116]:
# The two lines below are equivalent
pd.Timestamp(1991, 4, 12)
pd.Timestamp(year = 1991, month = 4, day = 12)

Timestamp('1991-04-12 00:00:00')

In [117]:
(pd.Timestamp(year = 1991, month = 4, day = 12)
 == dt.date(year = 1991, month = 4, day = 12))

False

In [118]:
(pd.Timestamp(year = 1991, month = 4, day = 12, minute = 2)
 == dt.datetime(year = 1991, month = 4, day = 12, minute = 2))

True

In [119]:
(pd.Timestamp(year = 1991, month = 4, day = 12, minute = 2)
 == dt.datetime(year = 1991, month = 4, day = 12, minute = 1))

False

In [120]:
pd.Timestamp("2015-03-31")

Timestamp('2015-03-31 00:00:00')

In [121]:
pd.Timestamp("2015/03/31")

Timestamp('2015-03-31 00:00:00')

In [122]:
pd.Timestamp("03/31/2015")

Timestamp('2015-03-31 00:00:00')

In [123]:
pd.Timestamp("2021-03-08 08:35:15")

Timestamp('2021-03-08 08:35:15')

In [124]:
pd.Timestamp("2021-03-08 6:13:29 PM")

Timestamp('2021-03-08 18:13:29')

In [125]:
pd.Timestamp(dt.datetime(2000, 2, 3, 21, 35, 22))

Timestamp('2000-02-03 21:35:22')

In [126]:
my_time = pd.Timestamp(dt.datetime(2000, 2, 3, 21, 35, 22))
print(my_time.year)
print(my_time.month)
print(my_time.day)
print(my_time.hour)
print(my_time.minute)
print(my_time.second)

2000
2
3
21
35
22


## 11.2 Storing Multiple Timestamps in a DatetimeIndex

In [127]:
pd.Series([1, 2, 3]).index

RangeIndex(start=0, stop=3, step=1)

In [128]:
pd.Series([1, 2, 3], index = ["A", "B", "C"]).index

Index(['A', 'B', 'C'], dtype='object')

In [129]:
timestamps = [
    pd.Timestamp("2020-01-01"),
    pd.Timestamp("2020-02-01"),
    pd.Timestamp("2020-03-01"),
]

pd.Series([1, 2, 3], index = timestamps).index

DatetimeIndex(['2020-01-01', '2020-02-01', '2020-03-01'], dtype='datetime64[ns]', freq=None)

In [130]:
datetimes = [
    dt.datetime(2020, 1, 1),
    dt.datetime(2020, 2, 1),
    dt.datetime(2020, 3, 1),
]

pd.Series([1, 2, 3], index = datetimes).index

DatetimeIndex(['2020-01-01', '2020-02-01', '2020-03-01'], dtype='datetime64[ns]', freq=None)

In [131]:
string_dates = ["2018/01/02", "2016/04/12", "2009/09/07"]
pd.DatetimeIndex(string_dates)

DatetimeIndex(['2018-01-02', '2016-04-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [132]:
mixed_dates = [
    dt.date(2018, 1, 2),
    "2016/04/12",
    pd.Timestamp(2009, 9, 7)
]

dt_index = pd.DatetimeIndex(mixed_dates)
dt_index

DatetimeIndex(['2018-01-02', '2016-04-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [133]:
s = pd.Series(data = [100, 200, 300], index = dt_index)
s

Unnamed: 0,0
2018-01-02,100
2016-04-12,200
2009-09-07,300


In [134]:
s.sort_index()

Unnamed: 0,0
2009-09-07,300
2016-04-12,200
2018-01-02,100


In [135]:
morning = pd.Timestamp("2020-01-01 11:23:22 AM")
evening = pd.Timestamp("2020-01-01 11:23:22 PM")

morning < evening

True

## 11.3	Converting Column or Index Values to Datetimes

In [136]:
disney = pd.read_csv("disney.csv")
disney.head()

Unnamed: 0,Date,High,Low,Open,Close
0,1962-01-02,0.096026,0.092908,0.092908,0.092908
1,1962-01-03,0.094467,0.092908,0.092908,0.094155
2,1962-01-04,0.094467,0.093532,0.094155,0.094155
3,1962-01-05,0.094779,0.093844,0.094155,0.094467
4,1962-01-08,0.095714,0.092285,0.094467,0.094155


In [137]:
disney.dtypes

Unnamed: 0,0
Date,object
High,float64
Low,float64
Open,float64
Close,float64


In [138]:
disney = pd.read_csv("disney.csv", parse_dates = ["Date"])

In [139]:
string_dates = ["2015-01-01", "2016-02-02", "2017-03-03"]
dt_index = pd.to_datetime(string_dates)
dt_index

DatetimeIndex(['2015-01-01', '2016-02-02', '2017-03-03'], dtype='datetime64[ns]', freq=None)

In [140]:
pd.to_datetime(disney["Date"]).head()

Unnamed: 0,Date
0,1962-01-02
1,1962-01-03
2,1962-01-04
3,1962-01-05
4,1962-01-08


In [141]:
disney["Date"] = pd.to_datetime(disney["Date"])

In [142]:
disney.dtypes

Unnamed: 0,0
Date,datetime64[ns]
High,float64
Low,float64
Open,float64
Close,float64


## 11.4 Using the DatetimeProperties Object

In [143]:
disney["Date"].dt

<pandas.core.indexes.accessors.DatetimeProperties object at 0x79bbaa37efc0>

In [144]:
disney["Date"].head(3)

Unnamed: 0,Date
0,1962-01-02
1,1962-01-03
2,1962-01-04


In [145]:
disney["Date"].dt.day.head(3)

Unnamed: 0,Date
0,2
1,3
2,4


In [146]:
disney["Date"].dt.month.head(3)

Unnamed: 0,Date
0,1
1,1
2,1


In [147]:
disney["Date"].dt.year.head(3)

Unnamed: 0,Date
0,1962
1,1962
2,1962


In [148]:
disney["Date"].dt.dayofweek.head()

Unnamed: 0,Date
0,1
1,2
2,3
3,4
4,0


In [149]:
disney["Date"].dt.day_name().head()

Unnamed: 0,Date
0,Tuesday
1,Wednesday
2,Thursday
3,Friday
4,Monday


In [150]:
disney["Day of Week"] = disney["Date"].dt.day_name()

In [151]:
group = disney.groupby("Day of Week")

In [152]:
group.mean()

Unnamed: 0_level_0,Date,High,Low,Open,Close
Day of Week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Friday,1991-04-22 07:43:12.391304320,23.767304,23.318898,23.552872,23.554498
Monday,1991-01-07 17:19:32.688477952,23.377271,22.930606,23.161392,23.162543
Thursday,1991-04-18 19:51:05.226198528,23.770234,23.288687,23.534561,23.540359
Tuesday,1991-04-28 15:55:03.487213568,23.791234,23.335267,23.571755,23.562907
Wednesday,1991-06-05 20:20:29.619746432,23.842743,23.355419,23.605618,23.609873


In [153]:
disney["Date"].dt.month_name().head()

Unnamed: 0,Date
0,January
1,January
2,January
3,January
4,January


In [154]:
disney["Date"].dt.is_quarter_start.tail()

Unnamed: 0,Date
14722,False
14723,False
14724,False
14725,True
14726,False


In [155]:
disney[disney["Date"].dt.is_quarter_start].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
189,1962-10-01,0.064849,0.062355,0.063913,0.062355,Monday
314,1963-04-01,0.087989,0.086704,0.087025,0.086704,Monday
377,1963-07-01,0.096338,0.095053,0.096338,0.095696,Monday
441,1963-10-01,0.110467,0.107898,0.107898,0.110467,Tuesday
565,1964-04-01,0.116248,0.112394,0.112394,0.116248,Wednesday


In [156]:
disney[disney["Date"].dt.is_quarter_end].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
251,1962-12-31,0.074501,0.07129,0.074501,0.072253,Monday
440,1963-09-30,0.109825,0.105972,0.108541,0.107577,Monday
502,1963-12-31,0.101476,0.09698,0.097622,0.101476,Tuesday
564,1964-03-31,0.115605,0.112394,0.114963,0.112394,Tuesday
628,1964-06-30,0.101476,0.100191,0.101476,0.100834,Tuesday


In [157]:
disney[disney["Date"].dt.is_month_start].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
22,1962-02-01,0.096338,0.093532,0.093532,0.094779,Thursday
41,1962-03-01,0.095714,0.093532,0.093532,0.095714,Thursday
83,1962-05-01,0.087296,0.085426,0.085738,0.086673,Tuesday
105,1962-06-01,0.079814,0.077943,0.079814,0.079814,Friday
147,1962-08-01,0.06859,0.068278,0.06859,0.06859,Wednesday


In [158]:
disney[disney["Date"].dt.is_month_end].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
21,1962-01-31,0.093844,0.092908,0.093532,0.093532,Wednesday
40,1962-02-28,0.094779,0.09322,0.094155,0.09322,Wednesday
82,1962-04-30,0.087608,0.085738,0.087608,0.085738,Monday
104,1962-05-31,0.082308,0.079814,0.079814,0.079814,Thursday
146,1962-07-31,0.069214,0.068278,0.068278,0.06859,Tuesday


In [159]:
disney[disney["Date"].dt.is_year_start].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week


In [160]:
disney[disney["Date"].dt.is_year_end].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
251,1962-12-31,0.074501,0.07129,0.074501,0.072253,Monday
502,1963-12-31,0.101476,0.09698,0.097622,0.101476,Tuesday
755,1964-12-31,0.117853,0.11689,0.11689,0.11689,Thursday
1007,1965-12-31,0.154141,0.150929,0.153498,0.152214,Friday
1736,1968-12-31,0.439301,0.431594,0.434163,0.436732,Tuesday


## 11.5 Adding and Subtracting Durations of Time

In [161]:
pd.DateOffset(years = 3, months = 4, days = 5)

<DateOffset: days=5, months=4, years=3>

In [162]:
disney["Date"].head()

Unnamed: 0,Date
0,1962-01-02
1,1962-01-03
2,1962-01-04
3,1962-01-05
4,1962-01-08


In [163]:
(disney["Date"] + pd.DateOffset(days = 5)).head()

Unnamed: 0,Date
0,1962-01-07
1,1962-01-08
2,1962-01-09
3,1962-01-10
4,1962-01-13


In [164]:
(disney["Date"] - pd.DateOffset(days = 3)).head()

Unnamed: 0,Date
0,1961-12-30
1,1961-12-31
2,1962-01-01
3,1962-01-02
4,1962-01-05


In [165]:
(disney["Date"] + pd.DateOffset(days = 10, hours = 6)).head()

Unnamed: 0,Date
0,1962-01-12 06:00:00
1,1962-01-13 06:00:00
2,1962-01-14 06:00:00
3,1962-01-15 06:00:00
4,1962-01-18 06:00:00


In [166]:
(
    disney["Date"]
    - pd.DateOffset(
        years = 1, months = 3, days = 10, hours = 6, minutes = 3
    )
).head()

Unnamed: 0,Date
0,1960-09-21 17:57:00
1,1960-09-22 17:57:00
2,1960-09-23 17:57:00
3,1960-09-24 17:57:00
4,1960-09-27 17:57:00


## 11.6 Date Offsets

In [167]:
disney["Date"].tail()

Unnamed: 0,Date
14722,2020-06-26
14723,2020-06-29
14724,2020-06-30
14725,2020-07-01
14726,2020-07-02


In [168]:
(disney["Date"] + pd.offsets.MonthEnd()).tail()

Unnamed: 0,Date
14722,2020-06-30
14723,2020-06-30
14724,2020-07-31
14725,2020-07-31
14726,2020-07-31


In [169]:
(disney["Date"] - pd.offsets.MonthEnd()).tail()

Unnamed: 0,Date
14722,2020-05-31
14723,2020-05-31
14724,2020-05-31
14725,2020-06-30
14726,2020-06-30


In [170]:
(disney["Date"] + pd.offsets.MonthBegin()).tail()

Unnamed: 0,Date
14722,2020-07-01
14723,2020-07-01
14724,2020-07-01
14725,2020-08-01
14726,2020-08-01


In [171]:
(disney["Date"] - pd.offsets.MonthBegin()).tail()

Unnamed: 0,Date
14722,2020-06-01
14723,2020-06-01
14724,2020-06-01
14725,2020-06-01
14726,2020-07-01


In [172]:
may_dates = ["2020-05-28", "2020-05-29", "2020-05-30"]
end_of_may = pd.Series(pd.to_datetime(may_dates))
end_of_may

Unnamed: 0,0
0,2020-05-28
1,2020-05-29
2,2020-05-30


In [173]:
end_of_may + pd.offsets.MonthEnd()

Unnamed: 0,0
0,2020-05-31
1,2020-05-31
2,2020-05-31


In [174]:
end_of_may + pd.offsets.BMonthEnd()

Unnamed: 0,0
0,2020-05-29
1,2020-06-30
2,2020-06-30


## 11.7 The `timedelta` Object

In [175]:
duration = pd.Timedelta(
    days = 8,
    hours = 7,
    minutes = 6,
    seconds = 5
)

duration

Timedelta('8 days 07:06:05')

In [176]:
pd.to_timedelta("3 hours, 5 minutes, 12 seconds")

Timedelta('0 days 03:05:12')

In [177]:
pd.to_timedelta(5, unit = "hour")

Timedelta('0 days 05:00:00')

In [178]:
pd.to_timedelta([5, 10, 15], unit = "day")

TimedeltaIndex(['5 days', '10 days', '15 days'], dtype='timedelta64[ns]', freq=None)

In [179]:
pd.Timestamp("1999-02-05") - pd.Timestamp("1998-05-24")

Timedelta('257 days 00:00:00')

In [180]:
deliveries = pd.read_csv("deliveries.csv")
deliveries.head()

Unnamed: 0,order_date,delivery_date
0,5/24/98,2/5/99
1,4/22/92,3/6/98
2,2/10/91,8/26/92
3,7/21/92,11/20/97
4,9/2/93,6/10/98


In [181]:
deliveries["order_date"] = pd.to_datetime(
    deliveries["order_date"]
)

deliveries["delivery_date"] = pd.to_datetime(
    deliveries["delivery_date"]
)

  deliveries["order_date"] = pd.to_datetime(
  deliveries["delivery_date"] = pd.to_datetime(


In [182]:
for column in ["order_date", "delivery_date"]:
    deliveries[column] = pd.to_datetime(deliveries[column])

In [183]:
deliveries.head()

Unnamed: 0,order_date,delivery_date
0,1998-05-24,1999-02-05
1,1992-04-22,1998-03-06
2,1991-02-10,1992-08-26
3,1992-07-21,1997-11-20
4,1993-09-02,1998-06-10


In [184]:
(deliveries["delivery_date"] - deliveries["order_date"]).head()

Unnamed: 0,0
0,257 days
1,2144 days
2,563 days
3,1948 days
4,1742 days


In [185]:
deliveries["duration"] = (
    deliveries["delivery_date"] - deliveries["order_date"]
)
deliveries.head()

Unnamed: 0,order_date,delivery_date,duration
0,1998-05-24,1999-02-05,257 days
1,1992-04-22,1998-03-06,2144 days
2,1991-02-10,1992-08-26,563 days
3,1992-07-21,1997-11-20,1948 days
4,1993-09-02,1998-06-10,1742 days


In [186]:
deliveries.dtypes

Unnamed: 0,0
order_date,datetime64[ns]
delivery_date,datetime64[ns]
duration,timedelta64[ns]


In [187]:
(deliveries["delivery_date"] - deliveries["duration"]).head()

Unnamed: 0,0
0,1998-05-24
1,1992-04-22
2,1991-02-10
3,1992-07-21
4,1993-09-02


In [188]:
(deliveries["delivery_date"] + deliveries["duration"]).head()

Unnamed: 0,0
0,1999-10-20
1,2004-01-18
2,1994-03-12
3,2003-03-22
4,2003-03-18


In [189]:
deliveries.sort_values("duration")

Unnamed: 0,order_date,delivery_date,duration
454,1990-05-24,1990-06-01,8 days
294,1994-08-11,1994-08-20,9 days
10,1998-05-10,1998-05-19,9 days
499,1993-06-03,1993-06-13,10 days
143,1997-09-20,1997-10-06,16 days
...,...,...,...
152,1990-09-18,1999-12-19,3379 days
62,1990-04-02,1999-08-16,3423 days
458,1990-02-13,1999-11-15,3562 days
145,1990-03-07,1999-12-25,3580 days


In [190]:
deliveries["duration"].max()

Timedelta('3583 days 00:00:00')

In [191]:
deliveries["duration"].min()

Timedelta('8 days 00:00:00')

In [192]:
deliveries["duration"].mean()

Timedelta('1217 days 22:53:53.532934128')

In [193]:
# The two lines below are equivalent
(deliveries["duration"] > pd.Timedelta(days = 365)).head()
(deliveries["duration"] > "365 days").head()

Unnamed: 0,duration
0,False
1,True
2,True
3,True
4,True


In [194]:
deliveries[deliveries["duration"] > "365 days"].head()

Unnamed: 0,order_date,delivery_date,duration
1,1992-04-22,1998-03-06,2144 days
2,1991-02-10,1992-08-26,563 days
3,1992-07-21,1997-11-20,1948 days
4,1993-09-02,1998-06-10,1742 days
6,1990-01-25,1994-10-02,1711 days


In [195]:
long_time = (
    deliveries["duration"] > "2000 days, 8 hours, 4 minutes"
)

deliveries[long_time].head()

Unnamed: 0,order_date,delivery_date,duration
1,1992-04-22,1998-03-06,2144 days
7,1992-02-23,1998-12-30,2502 days
11,1992-10-17,1998-10-06,2180 days
12,1992-05-30,1999-08-15,2633 days
15,1990-01-20,1998-07-24,3107 days


## 11.8 Coding Challenge

### 11.8.1 Problems

In [196]:
citi_bike = pd.read_csv("citibike.csv")
citi_bike.head()

Unnamed: 0,start_time,stop_time
0,2020-06-01 00:00:03.3720,2020-06-01 00:17:46.2080
1,2020-06-01 00:00:03.5530,2020-06-01 01:03:33.9360
2,2020-06-01 00:00:09.6140,2020-06-01 00:17:06.8330
3,2020-06-01 00:00:12.1780,2020-06-01 00:03:58.8640
4,2020-06-01 00:00:21.2550,2020-06-01 00:24:18.9650


In [197]:
citi_bike.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1882273 entries, 0 to 1882272
Data columns (total 2 columns):
 #   Column      Dtype 
---  ------      ----- 
 0   start_time  object
 1   stop_time   object
dtypes: object(2)
memory usage: 28.7+ MB


### 11.8.2. Solutions

In [198]:
for column in ["start_time", "stop_time"]:
    citi_bike[column] = pd.to_datetime(citi_bike[column])

In [199]:
citi_bike.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1882273 entries, 0 to 1882272
Data columns (total 2 columns):
 #   Column      Dtype         
---  ------      -----         
 0   start_time  datetime64[ns]
 1   stop_time   datetime64[ns]
dtypes: datetime64[ns](2)
memory usage: 28.7 MB


In [200]:
citi_bike["start_time"].dt.day_name().head()

Unnamed: 0,start_time
0,Monday
1,Monday
2,Monday
3,Monday
4,Monday


In [201]:
citi_bike["start_time"].dt.day_name().value_counts()

Unnamed: 0_level_0,count
start_time,Unnamed: 1_level_1
Tuesday,305833
Sunday,301482
Monday,292690
Saturday,285966
Friday,258479
Wednesday,222647
Thursday,215176


In [202]:
citi_bike["start_time"].dt.dayofweek.head()

Unnamed: 0,start_time
0,0
1,0
2,0
3,0
4,0


In [203]:
days_away_from_monday = citi_bike["start_time"].dt.dayofweek

In [204]:
citi_bike["start_time"] - pd.to_timedelta(
    days_away_from_monday, unit = "day"
)

Unnamed: 0,start_time
0,2020-06-01 00:00:03.372
1,2020-06-01 00:00:03.553
2,2020-06-01 00:00:09.614
3,2020-06-01 00:00:12.178
4,2020-06-01 00:00:21.255
...,...
1882268,2020-06-29 23:59:41.116
1882269,2020-06-29 23:59:46.426
1882270,2020-06-29 23:59:47.477
1882271,2020-06-29 23:59:53.395


In [205]:
dates_rounded_to_monday = citi_bike[
    "start_time"
] - pd.to_timedelta(days_away_from_monday, unit = "day")

In [206]:
dates_rounded_to_monday.value_counts().head()

Unnamed: 0_level_0,count
start_time,Unnamed: 1_level_1
2020-06-08 18:28:49.897,3
2020-06-08 15:29:16.897,3
2020-06-22 15:32:50.931,3
2020-06-08 16:50:44.596,3
2020-06-22 17:43:44.188,3


In [207]:
dates_rounded_to_monday.dt.date.head()

Unnamed: 0,start_time
0,2020-06-01
1,2020-06-01
2,2020-06-01
3,2020-06-01
4,2020-06-01


In [208]:
dates_rounded_to_monday.dt.date.value_counts()

Unnamed: 0_level_0,count
start_time,Unnamed: 1_level_1
2020-06-15,481211
2020-06-08,471384
2020-06-22,465412
2020-06-01,337590
2020-06-29,126676


In [209]:
citi_bike["duration"] = (
    citi_bike["stop_time"] - citi_bike["start_time"]
)

citi_bike.head()

Unnamed: 0,start_time,stop_time,duration
0,2020-06-01 00:00:03.372,2020-06-01 00:17:46.208,0 days 00:17:42.836000
1,2020-06-01 00:00:03.553,2020-06-01 01:03:33.936,0 days 01:03:30.383000
2,2020-06-01 00:00:09.614,2020-06-01 00:17:06.833,0 days 00:16:57.219000
3,2020-06-01 00:00:12.178,2020-06-01 00:03:58.864,0 days 00:03:46.686000
4,2020-06-01 00:00:21.255,2020-06-01 00:24:18.965,0 days 00:23:57.710000


In [210]:
citi_bike["duration"].mean()

Timedelta('0 days 00:27:19.590506853')

In [211]:
citi_bike["duration"].sort_values(ascending = False).head()

Unnamed: 0,duration
50593,32 days 15:01:54.940000
98339,31 days 01:47:20.632000
52306,30 days 19:32:20.696000
15171,30 days 04:26:48.424000
149761,28 days 09:24:50.696000


In [212]:
citi_bike.nlargest(n = 5, columns = "duration")

Unnamed: 0,start_time,stop_time,duration
50593,2020-06-01 21:30:17.759,2020-07-04 12:32:12.699,32 days 15:01:54.940000
98339,2020-06-02 19:41:39.659,2020-07-03 21:29:00.291,31 days 01:47:20.632000
52306,2020-06-01 22:17:10.908,2020-07-02 17:49:31.604,30 days 19:32:20.696000
15171,2020-06-01 13:01:41.942,2020-07-01 17:28:30.366,30 days 04:26:48.424000
149761,2020-06-04 14:36:53.482,2020-07-03 00:01:44.178,28 days 09:24:50.696000


## 11.9 Summary