# 11.1 Timestamp 객체

## 11.1.1 파이썬의 날짜/시간

In [1]:
import datetime as dt
import pandas as pd

In [3]:
# 다음 두 줄은 결과가 동일합니다.
birthday = dt.date(1991, 4, 12)
birthday = dt.date(year = 1991, month = 4, day = 12)
birthday

datetime.date(1991, 4, 12)

In [4]:
birthday.year

1991

In [5]:
birthday.month

4

In [6]:
birthday.day

12

In [7]:
birthday.month = 10

AttributeError: attribute 'month' of 'datetime.date' objects is not writable

In [8]:
# 다음 두 줄을 결과가 동일합니다.
alarm_clock = dt.time(6, 43, 25)
alarm_clock = dt.time(hour = 6, minute = 43, second = 25)
alarm_clock

datetime.time(6, 43, 25)

In [9]:
dt.time()

datetime.time(0, 0)

In [10]:
dt.time(hour = 9, second = 42)

datetime.time(9, 0, 42)

In [11]:
dt.time(hour = 19, minute = 43, second = 22)

datetime.time(19, 43, 22)

In [12]:
alarm_clock.hour

6

In [13]:
alarm_clock.minute

43

In [14]:
alarm_clock.second

25

In [15]:
# 다음 두 줄은 결과가 동일합니다.
moon_landing = dt.datetime(1969, 7, 20, 22, 56, 20)
moon_landing = dt.datetime(
    year = 1969,
    month = 7,
    day = 20,
    hour = 22,
    minute = 56,
    second = 20
)

moon_landing

datetime.datetime(1969, 7, 20, 22, 56, 20)

In [16]:
dt.datetime(2020, 1, 1)

datetime.datetime(2020, 1, 1, 0, 0)

In [19]:
dt.timedelta(
    weeks = 8,
    days = 6,
    hours = 3,
    minutes = 58,
    seconds = 12
)

datetime.timedelta(days=62, seconds=14292)

# 11.1.2 판다스의 날짜/시간

In [20]:
# 다음 두 줄은 결과가 동일합니다.
pd.Timestamp(1991, 4, 12)
pd.Timestamp(year = 1991, month = 4, day = 12)

Timestamp('1991-04-12 00:00:00')

In [21]:
(pd.Timestamp(year = 1991, month = 4, day = 12) == dt.date(year = 1991, month = 4, day = 12))

  (pd.Timestamp(year = 1991, month = 4, day = 12) == dt.date(year = 1991, month = 4, day = 12))


True

In [22]:
(pd.Timestamp(year = 1991, month = 4, day = 12, minute = 2) == dt.datetime(year = 1991, month = 4, day = 12, minute = 2))

True

In [23]:
(pd.Timestamp(year = 1991, month = 4, day = 12, minute = 2) == dt.datetime(year = 1991, month = 4, day = 12, minute = 1))

False

In [24]:
pd.Timestamp('2015-03-31')

Timestamp('2015-03-31 00:00:00')

In [25]:
pd.Timestamp('2015/03/31')

Timestamp('2015-03-31 00:00:00')

In [26]:
pd.Timestamp('03/31/2015')

Timestamp('2015-03-31 00:00:00')

In [27]:
pd.Timestamp('2021-03-08 08:35:15')

Timestamp('2021-03-08 08:35:15')

In [28]:
pd.Timestamp('2021-03-08 6:13:29 PM')

Timestamp('2021-03-08 18:13:29')

In [29]:
pd.Timestamp(dt.datetime(2000, 2, 3, 21, 35, 22))

Timestamp('2000-02-03 21:35:22')

In [30]:
my_time = pd.Timestamp(dt.datetime(2000, 2, 3, 21, 35, 22))
print(my_time.year)
print(my_time.month)
print(my_time.day)
print(my_time.hour)
print(my_time.minute)
print(my_time.second)

2000
2
3
21
35
22


# 11.2 다중 Timestamp를 저장하는 DatetimeIndex

In [31]:
pd.Series([1, 2, 3]).index

RangeIndex(start=0, stop=3, step=1)

In [32]:
pd.Series([1, 2, 3], index = ['A', 'B', 'C']).index

Index(['A', 'B', 'C'], dtype='object')

In [33]:
timestamps = [
    pd.Timestamp('2020-01-01'),
    pd.Timestamp('2020-02-01'),
    pd.Timestamp('2020-03-01')
]
pd.Series([1, 2, 3], index = timestamps).index

DatetimeIndex(['2020-01-01', '2020-02-01', '2020-03-01'], dtype='datetime64[ns]', freq=None)

In [34]:
datetimes = [
    dt.datetime(2020, 1, 1),
    dt.datetime(2020, 2, 1),
    dt.datetime(2020, 3, 1)
]
pd.Series([1, 2, 3], index = datetimes).index

DatetimeIndex(['2020-01-01', '2020-02-01', '2020-03-01'], dtype='datetime64[ns]', freq=None)

In [35]:
string_dates = ['2018/01/02', '2016/04/12', '2009/09/07']
pd.DatetimeIndex(string_dates)

DatetimeIndex(['2018-01-02', '2016-04-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [36]:
mixed_dates = [
    dt.date(2018, 1, 2),
    '2016/04/12',
    pd.Timestamp(2009, 9, 7)
]
dt_index = pd.DatetimeIndex(mixed_dates)
dt_index

DatetimeIndex(['2018-01-02', '2016-04-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [38]:
s = pd.Series(data = [100, 200, 300], index = dt_index)
s

2018-01-02    100
2016-04-12    200
2009-09-07    300
dtype: int64

In [39]:
s.sort_index()

2009-09-07    300
2016-04-12    200
2018-01-02    100
dtype: int64

In [41]:
morning = pd.Timestamp('2020-01-01 11:23:22 AM')
evening = pd.Timestamp('2020-01-01 11:23:22 PM')

morning < evening

True

# 11.3 열 또는 인덱스 값을 날짜/시간으로 변환

In [42]:
disney = pd.read_csv('disney.csv')
disney.head()

Unnamed: 0,Date,High,Low,Open,Close
0,1962-01-02,0.096026,0.092908,0.092908,0.092908
1,1962-01-03,0.094467,0.092908,0.092908,0.094155
2,1962-01-04,0.094467,0.093532,0.094155,0.094155
3,1962-01-05,0.094779,0.093844,0.094155,0.094467
4,1962-01-08,0.095714,0.092285,0.094467,0.094155


In [44]:
disney.dtypes

Date      object
High     float64
Low      float64
Open     float64
Close    float64
dtype: object

In [45]:
disney = pd.read_csv('disney.csv', parse_dates = ['Date'])

In [46]:
string_dates = ['2015-01-01', '2016-02-02', '2017-03-03']
dt_index = pd.to_datetime(string_dates)
dt_index

DatetimeIndex(['2015-01-01', '2016-02-02', '2017-03-03'], dtype='datetime64[ns]', freq=None)

In [47]:
pd.to_datetime(disney['Date']).head()

0   1962-01-02
1   1962-01-03
2   1962-01-04
3   1962-01-05
4   1962-01-08
Name: Date, dtype: datetime64[ns]

In [48]:
disney['Date'] = pd.to_datetime(disney['Date'])

In [49]:
disney.dtypes

Date     datetime64[ns]
High            float64
Low             float64
Open            float64
Close           float64
dtype: object

# 11.4 DatetimeProperties 객체

In [50]:
disney['Date'].dt

<pandas.core.indexes.accessors.DatetimeProperties object at 0x0000013EFD640160>

In [51]:
disney['Date'].head(3)

0   1962-01-02
1   1962-01-03
2   1962-01-04
Name: Date, dtype: datetime64[ns]

In [52]:
disney['Date'].dt.day.head()

0    2
1    3
2    4
3    5
4    8
Name: Date, dtype: int64

In [53]:
disney['Date'].dt.month.head(3)

0    1
1    1
2    1
Name: Date, dtype: int64

In [54]:
disney['Date'].dt.year.head(3)

0    1962
1    1962
2    1962
Name: Date, dtype: int64

In [55]:
disney['Date'].dt.dayofweek.head()

0    1
1    2
2    3
3    4
4    0
Name: Date, dtype: int64

In [56]:
disney['Date'].dt.day_name().head()

0      Tuesday
1    Wednesday
2     Thursday
3       Friday
4       Monday
Name: Date, dtype: object

In [57]:
disney['Day of Week'] = disney['Date'].dt.day_name()

In [58]:
group = disney.groupby('Day of Week')

In [59]:
group.mean()

Unnamed: 0_level_0,High,Low,Open,Close
Day of Week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Friday,23.767304,23.318898,23.552872,23.554498
Monday,23.377271,22.930606,23.161392,23.162543
Thursday,23.770234,23.288687,23.534561,23.540359
Tuesday,23.791234,23.335267,23.571755,23.562907
Wednesday,23.842743,23.355419,23.605618,23.609873


In [60]:
disney['Date'].dt.month_name().head()

0    January
1    January
2    January
3    January
4    January
Name: Date, dtype: object

In [61]:
disney['Date'].dt.is_quarter_start.tail()

14722    False
14723    False
14724    False
14725     True
14726    False
Name: Date, dtype: bool

In [62]:
disney[disney['Date'].dt.is_quarter_start].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
189,1962-10-01,0.064849,0.062355,0.063913,0.062355,Monday
314,1963-04-01,0.087989,0.086704,0.087025,0.086704,Monday
377,1963-07-01,0.096338,0.095053,0.096338,0.095696,Monday
441,1963-10-01,0.110467,0.107898,0.107898,0.110467,Tuesday
565,1964-04-01,0.116248,0.112394,0.112394,0.116248,Wednesday


In [63]:
disney[disney['Date'].dt.is_quarter_end].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
251,1962-12-31,0.074501,0.07129,0.074501,0.072253,Monday
440,1963-09-30,0.109825,0.105972,0.108541,0.107577,Monday
502,1963-12-31,0.101476,0.09698,0.097622,0.101476,Tuesday
564,1964-03-31,0.115605,0.112394,0.114963,0.112394,Tuesday
628,1964-06-30,0.101476,0.100191,0.101476,0.100834,Tuesday


In [64]:
disney[disney['Date'].dt.is_month_start].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
22,1962-02-01,0.096338,0.093532,0.093532,0.094779,Thursday
41,1962-03-01,0.095714,0.093532,0.093532,0.095714,Thursday
83,1962-05-01,0.087296,0.085426,0.085738,0.086673,Tuesday
105,1962-06-01,0.079814,0.077943,0.079814,0.079814,Friday
147,1962-08-01,0.06859,0.068278,0.06859,0.06859,Wednesday


In [65]:
disney[disney['Date'].dt.is_month_end].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
21,1962-01-31,0.093844,0.092908,0.093532,0.093532,Wednesday
40,1962-02-28,0.094779,0.09322,0.094155,0.09322,Wednesday
82,1962-04-30,0.087608,0.085738,0.087608,0.085738,Monday
104,1962-05-31,0.082308,0.079814,0.079814,0.079814,Thursday
146,1962-07-31,0.069214,0.068278,0.068278,0.06859,Tuesday


In [66]:
disney[disney['Date'].dt.is_year_start].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week


In [67]:
disney[disney['Date'].dt.is_year_end].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
251,1962-12-31,0.074501,0.07129,0.074501,0.072253,Monday
502,1963-12-31,0.101476,0.09698,0.097622,0.101476,Tuesday
755,1964-12-31,0.117853,0.11689,0.11689,0.11689,Thursday
1007,1965-12-31,0.154141,0.150929,0.153498,0.152214,Friday
1736,1968-12-31,0.439301,0.431594,0.434163,0.436732,Tuesday


# 11.5 일정 시간의 덧셈과 뺄셈

In [68]:
pd.DateOffset(year = 3, month = 4, days = 5)

<DateOffset: days=5, month=4, year=3>

In [69]:
disney['Date'].head()

0   1962-01-02
1   1962-01-03
2   1962-01-04
3   1962-01-05
4   1962-01-08
Name: Date, dtype: datetime64[ns]

In [70]:
(disney['Date'] + pd.DateOffset(days = 5)).head()

0   1962-01-07
1   1962-01-08
2   1962-01-09
3   1962-01-10
4   1962-01-13
Name: Date, dtype: datetime64[ns]

In [71]:
(disney['Date'] - pd.DateOffset(days = 3)).head()

0   1961-12-30
1   1961-12-31
2   1962-01-01
3   1962-01-02
4   1962-01-05
Name: Date, dtype: datetime64[ns]

In [72]:
(disney['Date'] + pd.DateOffset(days = 10, hours = 6)).head()

0   1962-01-12 06:00:00
1   1962-01-13 06:00:00
2   1962-01-14 06:00:00
3   1962-01-15 06:00:00
4   1962-01-18 06:00:00
Name: Date, dtype: datetime64[ns]

In [73]:
(disney['Date'] - pd.DateOffset(years = 1, months = 3, days = 10, hours = 6, minutes = 3)).head()

0   1960-09-21 17:57:00
1   1960-09-22 17:57:00
2   1960-09-23 17:57:00
3   1960-09-24 17:57:00
4   1960-09-27 17:57:00
Name: Date, dtype: datetime64[ns]

# 11.6 날짜 오프셋

In [97]:
disney['Date'].tail()

14722   2020-06-26
14723   2020-06-29
14724   2020-06-30
14725   2020-07-01
14726   2020-07-02
Name: Date, dtype: datetime64[ns]

In [99]:
(disney['Date'] + pd.offsets.MonthEnd()).tail()

14722   2020-06-30
14723   2020-06-30
14724   2020-07-31
14725   2020-07-31
14726   2020-07-31
Name: Date, dtype: datetime64[ns]

In [100]:
(disney['Date'] - pd.offsets.MonthEnd()).tail()

14722   2020-05-31
14723   2020-05-31
14724   2020-05-31
14725   2020-06-30
14726   2020-06-30
Name: Date, dtype: datetime64[ns]

In [102]:
(disney['Date'] + pd.offsets.MonthBegin()).tail()

14722   2020-07-01
14723   2020-07-01
14724   2020-07-01
14725   2020-08-01
14726   2020-08-01
Name: Date, dtype: datetime64[ns]

In [103]:
(disney['Date'] - pd.offsets.MonthBegin()).tail()

14722   2020-06-01
14723   2020-06-01
14724   2020-06-01
14725   2020-06-01
14726   2020-07-01
Name: Date, dtype: datetime64[ns]

In [105]:
may_dates = ['2020-05-28', '2020-05-29', '2020-05-30']
end_of_may = pd.Series(pd.to_datetime(may_dates))
end_of_may

0   2020-05-28
1   2020-05-29
2   2020-05-30
dtype: datetime64[ns]

In [106]:
end_of_may + pd.offsets.MonthEnd()

0   2020-05-31
1   2020-05-31
2   2020-05-31
dtype: datetime64[ns]

In [107]:
end_of_may + pd.offsets.BMonthEnd()

0   2020-05-29
1   2020-06-30
2   2020-06-30
dtype: datetime64[ns]

# 11.7 Timedelta 객체

In [74]:
duration = pd.Timedelta(
    days = 8,
    hours = 7,
    minutes = 6,
    seconds = 5
)
duration

Timedelta('8 days 07:06:05')

In [75]:
pd.to_timedelta('3 hours, 5 minutes, 12 seconds')

Timedelta('0 days 03:05:12')

In [76]:
pd.to_timedelta(5, unit = 'hour')

Timedelta('0 days 05:00:00')

In [78]:
pd.to_timedelta([5, 10, 15], unit = 'day')

TimedeltaIndex(['5 days', '10 days', '15 days'], dtype='timedelta64[ns]', freq=None)

In [79]:
pd.Timestamp('1999-02-05') - pd.Timestamp('1998-05-24')

Timedelta('257 days 00:00:00')

In [80]:
deliveries = pd.read_csv('deliveries.csv')
deliveries.head()

Unnamed: 0,order_date,delivery_date
0,5/24/98,2/5/99
1,4/22/92,3/6/98
2,2/10/91,8/26/92
3,7/21/92,11/20/97
4,9/2/93,6/10/98


In [81]:
deliveries['order_date'] = pd.to_datetime(deliveries['order_date'])
deliveries['delivery_date'] = pd.to_datetime(deliveries['delivery_date'])

In [83]:
for column in ['order_date', 'delivery_date']:
    deliveries[column] = pd.to_datetime(deliveries[column])

In [84]:
deliveries.head()

Unnamed: 0,order_date,delivery_date
0,1998-05-24,1999-02-05
1,1992-04-22,1998-03-06
2,1991-02-10,1992-08-26
3,1992-07-21,1997-11-20
4,1993-09-02,1998-06-10


In [85]:
(deliveries['delivery_date'] - deliveries['order_date']).head()

0    257 days
1   2144 days
2    563 days
3   1948 days
4   1742 days
dtype: timedelta64[ns]

In [86]:
deliveries['duration'] = (deliveries['delivery_date'] - deliveries['order_date'])
deliveries.head()

Unnamed: 0,order_date,delivery_date,duration
0,1998-05-24,1999-02-05,257 days
1,1992-04-22,1998-03-06,2144 days
2,1991-02-10,1992-08-26,563 days
3,1992-07-21,1997-11-20,1948 days
4,1993-09-02,1998-06-10,1742 days


In [87]:
deliveries.dtypes

order_date        datetime64[ns]
delivery_date     datetime64[ns]
duration         timedelta64[ns]
dtype: object

In [88]:
(deliveries['delivery_date'] - deliveries['duration']).head()

0   1998-05-24
1   1992-04-22
2   1991-02-10
3   1992-07-21
4   1993-09-02
dtype: datetime64[ns]

In [89]:
(deliveries['delivery_date'] + deliveries['duration']).head()

0   1999-10-20
1   2004-01-18
2   1994-03-12
3   2003-03-22
4   2003-03-18
dtype: datetime64[ns]

In [90]:
deliveries.sort_values('duration')

Unnamed: 0,order_date,delivery_date,duration
454,1990-05-24,1990-06-01,8 days
294,1994-08-11,1994-08-20,9 days
10,1998-05-10,1998-05-19,9 days
499,1993-06-03,1993-06-13,10 days
143,1997-09-20,1997-10-06,16 days
...,...,...,...
152,1990-09-18,1999-12-19,3379 days
62,1990-04-02,1999-08-16,3423 days
458,1990-02-13,1999-11-15,3562 days
145,1990-03-07,1999-12-25,3580 days


In [91]:
deliveries['duration'].max()

Timedelta('3583 days 00:00:00')

In [92]:
deliveries['duration'].min()

Timedelta('8 days 00:00:00')

In [93]:
deliveries['duration'].mean()

Timedelta('1217 days 22:53:53.532934128')

In [94]:
# 다음 두 줄을 결과가 동일합니다.
(deliveries['duration'] > pd.Timedelta(days = 365)).head()
(deliveries['duration'] > '365 days').head()

0    False
1     True
2     True
3     True
4     True
Name: duration, dtype: bool

In [95]:
deliveries[deliveries['duration'] > '365 days'].head()

Unnamed: 0,order_date,delivery_date,duration
1,1992-04-22,1998-03-06,2144 days
2,1991-02-10,1992-08-26,563 days
3,1992-07-21,1997-11-20,1948 days
4,1993-09-02,1998-06-10,1742 days
6,1990-01-25,1994-10-02,1711 days


In [96]:
long_time = (deliveries['duration'] > '2000 days, 8 hours, 4 minutes')
deliveries[long_time].head()

Unnamed: 0,order_date,delivery_date,duration
1,1992-04-22,1998-03-06,2144 days
7,1992-02-23,1998-12-30,2502 days
11,1992-10-17,1998-10-06,2180 days
12,1992-05-30,1999-08-15,2633 days
15,1990-01-20,1998-07-24,3107 days
