In [1]:
import datetime as dt
import pandas as pd

In [101]:
# 년, 월, 일로 지정할 수 있습니다.
birthday = dt.datetime(2003, 8, 12)
birthday

datetime.datetime(2003, 8, 12, 0, 0)

In [102]:
birthday.year

2003

In [103]:
birthday.month

8

In [104]:
birthday.day

12

In [105]:
# 시간을 지정할 수 있습니다.
alarm_clock = dt.time(6, 43, 25)
alarm_clock

datetime.time(6, 43, 25)

In [106]:
# datetime을 통해 날짜와 시간을 동시에 지정할 수 있습니다.
moon_landing = dt.datetime(2003, 8, 12, 13, 22, 23)
moon_landing

datetime.datetime(2003, 8, 12, 13, 22, 23)

In [108]:
# 추출하고 싶은 것만 사용할 수 있습니다.
moon_landing.strftime('%Y-%m-%d %H:%M:%S')

'2003-08-12 13:22:23'

In [109]:
# 일수를 계산하고 초를 계산합니다.
dt.timedelta(weeks=8,
             days=6,
             hours=3,
             minutes=58,
             seconds=2)

datetime.timedelta(days=62, seconds=14282)

In [110]:
# datetime의 기능을 판다스 또한 제공합니다.
pd.Timestamp(2003, 8, 12)

Timestamp('2003-08-12 00:00:00')

In [111]:
pd.Timestamp('2003-08-12')

Timestamp('2003-08-12 00:00:00')

In [112]:
pd.Timestamp('2003/08/12')

Timestamp('2003-08-12 00:00:00')

In [113]:
pd.Timestamp('8/12/2003')

Timestamp('2003-08-12 00:00:00')

In [114]:
pd.Timestamp('2003-08-12 04:24:35 PM')

Timestamp('2003-08-12 16:24:35')

In [115]:
timestamps = [
    pd.Timestamp(2024, 12, 1),
    pd.Timestamp(2024, 12, 2),
    pd.Timestamp(2024, 12, 3)
]
pd.Series([1, 2, 3], index=timestamps).index

DatetimeIndex(['2024-12-01', '2024-12-02', '2024-12-03'], dtype='datetime64[ns]', freq=None)

In [116]:
timestamps.sort()
timestamps

[Timestamp('2024-12-01 00:00:00'),
 Timestamp('2024-12-02 00:00:00'),
 Timestamp('2024-12-03 00:00:00')]

In [117]:
disney = pd.read_csv('disney.csv')
disney.head()

Unnamed: 0,Date,High,Low,Open,Close
0,1962-01-02,0.096026,0.092908,0.092908,0.092908
1,1962-01-03,0.094467,0.092908,0.092908,0.094155
2,1962-01-04,0.094467,0.093532,0.094155,0.094155
3,1962-01-05,0.094779,0.093844,0.094155,0.094467
4,1962-01-08,0.095714,0.092285,0.094467,0.094155


In [118]:
disney.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14727 entries, 0 to 14726
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    14727 non-null  object 
 1   High    14727 non-null  float64
 2   Low     14727 non-null  float64
 3   Open    14727 non-null  float64
 4   Close   14727 non-null  float64
dtypes: float64(4), object(1)
memory usage: 575.4+ KB


In [119]:
disney = pd.read_csv('disney.csv',
                     parse_dates=['Date'])
disney.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14727 entries, 0 to 14726
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    14727 non-null  datetime64[ns]
 1   High    14727 non-null  float64       
 2   Low     14727 non-null  float64       
 3   Open    14727 non-null  float64       
 4   Close   14727 non-null  float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 575.4 KB


In [120]:
disney['Date'] = pd.to_datetime(disney['Date'])
disney.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14727 entries, 0 to 14726
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    14727 non-null  datetime64[ns]
 1   High    14727 non-null  float64       
 2   Low     14727 non-null  float64       
 3   Open    14727 non-null  float64       
 4   Close   14727 non-null  float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 575.4 KB


In [121]:
# 일자만 불러올 수 있습니다.
disney['Date'].dt.day

0         2
1         3
2         4
3         5
4         8
         ..
14722    26
14723    29
14724    30
14725     1
14726     2
Name: Date, Length: 14727, dtype: int32

In [122]:
# 해당 시간이 몇요일인지 알 수 있습니다. 0 : 월요일, 1 : 화요일 ...
a = pd.Timestamp(2024, 12, 3)
a.day_of_week

1

In [123]:
disney['Date'].dt.dayofweek

0        1
1        2
2        3
3        4
4        0
        ..
14722    4
14723    0
14724    1
14725    2
14726    3
Name: Date, Length: 14727, dtype: int32

In [124]:
# 요일을 영어로 지원합니다.
disney['Date'].dt.day_name()

0          Tuesday
1        Wednesday
2         Thursday
3           Friday
4           Monday
           ...    
14722       Friday
14723       Monday
14724      Tuesday
14725    Wednesday
14726     Thursday
Name: Date, Length: 14727, dtype: object

In [125]:
# 기존 데이터에 추가해줍시다.
disney['Day of Week'] = disney['Date'].dt.day_name()
disney.head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
0,1962-01-02,0.096026,0.092908,0.092908,0.092908,Tuesday
1,1962-01-03,0.094467,0.092908,0.092908,0.094155,Wednesday
2,1962-01-04,0.094467,0.093532,0.094155,0.094155,Thursday
3,1962-01-05,0.094779,0.093844,0.094155,0.094467,Friday
4,1962-01-08,0.095714,0.092285,0.094467,0.094155,Monday


In [126]:
# 요일별로 평균을 구할 수 있습니다.
disney.groupby('Day of Week').mean(numeric_only=True)

Unnamed: 0_level_0,High,Low,Open,Close
Day of Week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Friday,23.767304,23.318898,23.552872,23.554498
Monday,23.377271,22.930606,23.161392,23.162543
Thursday,23.770234,23.288687,23.534561,23.540359
Tuesday,23.791234,23.335267,23.571755,23.562907
Wednesday,23.842743,23.355419,23.605618,23.609873


In [127]:
# 해당 월의 데이터를 가져올 수 있습니다.
disney['Month'] = disney['Date'].dt.month_name()
disney.head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week,Month
0,1962-01-02,0.096026,0.092908,0.092908,0.092908,Tuesday,January
1,1962-01-03,0.094467,0.092908,0.092908,0.094155,Wednesday,January
2,1962-01-04,0.094467,0.093532,0.094155,0.094155,Thursday,January
3,1962-01-05,0.094779,0.093844,0.094155,0.094467,Friday,January
4,1962-01-08,0.095714,0.092285,0.094467,0.094155,Monday,January


In [128]:
# 월 별, 평균값을 알 수 있습니다.
disney.groupby('Month').mean(numeric_only=True).round(2)

Unnamed: 0_level_0,High,Low,Open,Close
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
April,24.29,23.8,24.06,24.07
August,23.06,22.62,22.86,22.84
December,23.67,23.23,23.45,23.44
February,24.16,23.67,23.92,23.93
January,23.19,22.73,22.96,22.97
July,23.69,23.28,23.49,23.48
June,25.0,24.54,24.78,24.77
March,23.8,23.28,23.57,23.56
May,24.93,24.45,24.7,24.7
November,23.57,23.08,23.31,23.34


In [129]:
# 분기는 1월, 4월, 7월, 10월이고 start는 1일을 기준으로 합니다.
disney[disney['Date'].dt.is_quarter_start]

Unnamed: 0,Date,High,Low,Open,Close,Day of Week,Month
189,1962-10-01,0.064849,0.062355,0.063913,0.062355,Monday,October
314,1963-04-01,0.087989,0.086704,0.087025,0.086704,Monday,April
377,1963-07-01,0.096338,0.095053,0.096338,0.095696,Monday,July
441,1963-10-01,0.110467,0.107898,0.107898,0.110467,Tuesday,October
565,1964-04-01,0.116248,0.112394,0.112394,0.116248,Wednesday,April
...,...,...,...,...,...,...,...
14409,2019-04-01,112.870003,111.379997,111.589996,112.510002,Monday,April
14472,2019-07-01,141.949997,139.220001,140.449997,141.649994,Monday,July
14536,2019-10-01,131.779999,129.509995,130.800003,129.550003,Tuesday,October
14662,2020-04-01,97.400002,92.559998,93.500000,94.919998,Wednesday,April


In [130]:
# 각 달의 마지막 날을 기준으로 합니다.
disney[disney['Date'].dt.is_quarter_end]

Unnamed: 0,Date,High,Low,Open,Close,Day of Week,Month
251,1962-12-31,0.074501,0.071290,0.074501,0.072253,Monday,December
440,1963-09-30,0.109825,0.105972,0.108541,0.107577,Monday,September
502,1963-12-31,0.101476,0.096980,0.097622,0.101476,Tuesday,December
564,1964-03-31,0.115605,0.112394,0.114963,0.112394,Tuesday,March
628,1964-06-30,0.101476,0.100191,0.101476,0.100834,Tuesday,June
...,...,...,...,...,...,...,...
14347,2018-12-31,109.669998,107.550003,109.110001,109.650002,Monday,December
14535,2019-09-30,130.860001,129.820007,130.350006,130.320007,Monday,September
14599,2019-12-31,144.770004,143.259995,143.669998,144.630005,Tuesday,December
14661,2020-03-31,103.160004,96.160004,100.220001,96.599998,Tuesday,March


In [131]:
# 신정에는 주식장이 거래가 되지 않습니다.
disney[disney['Date'].dt.is_year_start]

Unnamed: 0,Date,High,Low,Open,Close,Day of Week,Month


In [132]:
# 각 년의 마지막 값만을 도출합니다.
disney[disney['Date'].dt.is_year_end]

Unnamed: 0,Date,High,Low,Open,Close,Day of Week,Month
251,1962-12-31,0.074501,0.07129,0.074501,0.072253,Monday,December
502,1963-12-31,0.101476,0.09698,0.097622,0.101476,Tuesday,December
755,1964-12-31,0.117853,0.11689,0.11689,0.11689,Thursday,December
1007,1965-12-31,0.154141,0.150929,0.153498,0.152214,Friday,December
1736,1968-12-31,0.439301,0.431594,0.434163,0.436732,Tuesday,December
1986,1969-12-31,0.694275,0.678219,0.678219,0.688495,Wednesday,December
2240,1970-12-31,0.732168,0.721892,0.732168,0.726388,Thursday,December
2493,1971-12-31,1.430939,1.392404,1.392404,1.412956,Friday,December
2996,1973-12-31,0.983931,0.955672,0.973655,0.971086,Monday,December
3249,1974-12-31,0.44187,0.421318,0.421318,0.439301,Tuesday,December


In [133]:
# 만약, 기존 날짜에서 3년 4개월 3일 후의 일자를 구해야할때, DateOffset을 사용합니다.
pd.DateOffset(years=3,
              months=4,
              days=3)

<DateOffset: days=3, months=4, years=3>

In [67]:
disney['Date'].head()

0   1962-01-02
1   1962-01-03
2   1962-01-04
3   1962-01-05
4   1962-01-08
Name: Date, dtype: datetime64[ns]

In [69]:
disney['Date']+pd.DateOffset(days=5)

0       1962-01-07
1       1962-01-08
2       1962-01-09
3       1962-01-10
4       1962-01-13
           ...    
14722   2020-07-01
14723   2020-07-04
14724   2020-07-05
14725   2020-07-06
14726   2020-07-07
Name: Date, Length: 14727, dtype: datetime64[ns]

In [70]:
disney['Date']-pd.DateOffset(days=3)

0       1961-12-30
1       1961-12-31
2       1962-01-01
3       1962-01-02
4       1962-01-05
           ...    
14722   2020-06-23
14723   2020-06-26
14724   2020-06-27
14725   2020-06-28
14726   2020-06-29
Name: Date, Length: 14727, dtype: datetime64[ns]

In [71]:
disney['Date']+pd.DateOffset(days=10, hours=6)

0       1962-01-12 06:00:00
1       1962-01-13 06:00:00
2       1962-01-14 06:00:00
3       1962-01-15 06:00:00
4       1962-01-18 06:00:00
                ...        
14722   2020-07-06 06:00:00
14723   2020-07-09 06:00:00
14724   2020-07-10 06:00:00
14725   2020-07-11 06:00:00
14726   2020-07-12 06:00:00
Name: Date, Length: 14727, dtype: datetime64[ns]

In [72]:
disney['Date'].tail()

14722   2020-06-26
14723   2020-06-29
14724   2020-06-30
14725   2020-07-01
14726   2020-07-02
Name: Date, dtype: datetime64[ns]

In [135]:
# 판다스에서는 offsets 함수를 지원하고, MonthEND의 경우, 일자 설정 없이 해당 데이터를 월 끝으로 설정합니다.
# 단, 달의 마지막 날의 경우, 다음달 마지막일자로 설정하는점, 주의하세요.
disney['Date']+pd.offsets.MonthEnd()

0       1962-01-31
1       1962-01-31
2       1962-01-31
3       1962-01-31
4       1962-01-31
           ...    
14722   2020-06-30
14723   2020-06-30
14724   2020-07-31
14725   2020-07-31
14726   2020-07-31
Name: Date, Length: 14727, dtype: datetime64[ns]

In [136]:
disney['Date']-pd.offsets.MonthEnd()

0       1961-12-31
1       1961-12-31
2       1961-12-31
3       1961-12-31
4       1961-12-31
           ...    
14722   2020-05-31
14723   2020-05-31
14724   2020-05-31
14725   2020-06-30
14726   2020-06-30
Name: Date, Length: 14727, dtype: datetime64[ns]

In [76]:
disney['Date']+pd.offsets.MonthBegin()

0       1962-02-01
1       1962-02-01
2       1962-02-01
3       1962-02-01
4       1962-02-01
           ...    
14722   2020-07-01
14723   2020-07-01
14724   2020-07-01
14725   2020-08-01
14726   2020-08-01
Name: Date, Length: 14727, dtype: datetime64[ns]

In [77]:
may_dates = ['2020-05-28', '2020-05-29', '2020-05-30']
end_of_may = pd.Series(pd.to_datetime(may_dates))
end_of_may

0   2020-05-28
1   2020-05-29
2   2020-05-30
dtype: datetime64[ns]

In [78]:
end_of_may+pd.offsets.MonthEnd()

0   2020-05-31
1   2020-05-31
2   2020-05-31
dtype: datetime64[ns]

In [137]:
# 금, 목, 목
# BMonthEND의 경우, 월~금요일까지 영업일을 기준으로 작동합니다.
end_of_may+pd.offsets.BMonthEnd()

0   2020-05-29
1   2020-06-30
2   2020-06-30
dtype: datetime64[ns]

In [138]:
duration = pd.Timedelta(days=8,
                        hours=7,
                        minutes=6,
                        seconds=5)
duration

Timedelta('8 days 07:06:05')

In [139]:
pd.to_timedelta('3hours, 5minutes, 12seconds')

Timedelta('0 days 03:05:12')

In [140]:
pd.to_timedelta(5, unit='hour')

Timedelta('0 days 05:00:00')

In [141]:
deliveries = pd.read_csv('deliveries.csv')
deliveries.head()

Unnamed: 0,order_date,delivery_date
0,5/24/98,2/5/99
1,4/22/92,3/6/98
2,2/10/91,8/26/92
3,7/21/92,11/20/97
4,9/2/93,6/10/98


In [142]:
deliveries = pd.read_csv(filepath_or_buffer='deliveries.csv',
                         parse_dates=['order_date', 'delivery_date'])
deliveries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 501 entries, 0 to 500
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   order_date     501 non-null    datetime64[ns]
 1   delivery_date  501 non-null    datetime64[ns]
dtypes: datetime64[ns](2)
memory usage: 8.0 KB


  deliveries = pd.read_csv(filepath_or_buffer='deliveries.csv',
  deliveries = pd.read_csv(filepath_or_buffer='deliveries.csv',


In [143]:
# 각 일자의 차이를 알 수 있습니다.
deliveries['delivery_date']-deliveries['order_date']

0      257 days
1     2144 days
2      563 days
3     1948 days
4     1742 days
         ...   
496   1684 days
497   2394 days
498   2719 days
499     10 days
500    637 days
Length: 501, dtype: timedelta64[ns]

In [144]:
deliveries['duration'] = deliveries['delivery_date']-deliveries['order_date']
deliveries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 501 entries, 0 to 500
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype          
---  ------         --------------  -----          
 0   order_date     501 non-null    datetime64[ns] 
 1   delivery_date  501 non-null    datetime64[ns] 
 2   duration       501 non-null    timedelta64[ns]
dtypes: datetime64[ns](2), timedelta64[ns](1)
memory usage: 11.9 KB


In [145]:
deliveries['duration'].sort_values()

454      8 days
294      9 days
10       9 days
499     10 days
143     16 days
         ...   
152   3379 days
62    3423 days
458   3562 days
145   3580 days
448   3583 days
Name: duration, Length: 501, dtype: timedelta64[ns]

In [147]:
deliveries[deliveries['duration']>pd.Timedelta(days=365)]

Unnamed: 0,order_date,delivery_date,duration
1,1992-04-22,1998-03-06,2144 days
2,1991-02-10,1992-08-26,563 days
3,1992-07-21,1997-11-20,1948 days
4,1993-09-02,1998-06-10,1742 days
6,1990-01-25,1994-10-02,1711 days
...,...,...,...
495,1990-12-10,1992-12-16,737 days
496,1991-06-24,1996-02-02,1684 days
497,1991-09-09,1998-03-30,2394 days
498,1990-11-16,1998-04-27,2719 days
