<a href="https://colab.research.google.com/github/JakeOh/202011_itw_bd21/blob/main/lab_da/da19_datetime.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

*   Python의 datetime 모듈
    *   datetime 클래스: 날짜(date), 시간(time)과 관련된 정보들을 저장, 표현할 수 있는 메서드를 포함.
    *   https://docs.python.org/3/library/datetime.html?highlight=datetime#module-datetime 참고

In [1]:
import datetime  # datetime 모듈을 import

In [2]:
datetime.datetime.now()
# datetime 모듈의 datetime 클래스의 now 메서드 호출

datetime.datetime(2021, 3, 2, 4, 53, 3, 366816)

In [3]:
from datetime import datetime  # datetime 모듈에서 작성된 datetime 클래스를 import

In [4]:
datetime.now()  # datetime 클래스의 now 메서드 호출

datetime.datetime(2021, 3, 2, 4, 56, 32, 307108)

*   datetime 클래스의 메서드

In [7]:
# datetime.now(): OS(운영체제)의 시간대에서의 현재 시간을 리턴.
now = datetime.now()  #> Google Colab 가상 머신에서 설정된 시간대(UTC)의 현재 시간
print(now)

2021-03-02 05:01:38.132302


*   UTC(Coordinates Universal Timezone): 그리니치 천문대 시간
*   KST(Korean Standard Timezone): 한국 표준시(UTC+09:00)

In [8]:
type(now)  #> datetime 모듈의 datetiem 클래스 객체(object)

datetime.datetime

In [9]:
# datetime.datetime 객체의 속성들(properties)
print(now.year)
print(now.month)
print(now.day)

2021
3
2


In [10]:
print(f'{now.hour}:{now.minute}:{now.second}')

5:1:38


In [12]:
# datetime 생성자 함수(constructor): 특정 날짜/시간으로 datetime 객체를 생성
yesterday = datetime(2021, 3, 1)
print(yesterday)

2021-03-01 00:00:00


In [13]:
tomorrow = datetime(2021, 3, 3, 14, 13)
print(tomorrow)

2021-03-03 14:13:00


In [15]:
# 두 날짜 사이의 차(difference)
print(tomorrow - yesterday)

2 days, 14:13:00


In [18]:
from datetime import timedelta  # datetime 모듈의 timedelta 클래스를 import

In [19]:
current_time = datetime.now()
print(current_time)

2021-03-02 05:38:35.626358


In [21]:
one_week = timedelta(weeks=1)  # (1주일) 시간간격(timedelta) 객체 생성
print(current_time - one_week)
print(current_time + one_week)

2021-02-23 05:38:35.626358
2021-03-09 05:38:35.626358


In [22]:
one_day = timedelta(days=1)
print(current_time - one_day)
print(current_time + one_day)

2021-03-01 05:38:35.626358
2021-03-03 05:38:35.626358


*   datetime(날짜/시간) 객체 <---> str(문자열) 객체
    *   `datetime.strptime(문자열, 포맷문자열)`: 문자열(str)을 datetime 객체로 변환(parsing)
    *   `datetime.strftime(포맷문자열)`: datetime 객체를 문자열로 변환(formatting)


In [24]:
current_time = datetime.now()  # 현재 날짜/시간 정보를 저장하고 있는 datetime 객체 생성
print(current_time)
print(current_time.strftime('%Y/%m/%d'))  # datetime ---> str
print(current_time.strftime('%Y-%m-%d'))

2021-03-02 05:48:10.259939
2021/03/02
2021-03-02


*   날짜 포맷 문자열
    *   연도(year): %Y(4자리 연도),  %y(2자리 연도)
    *   월(month): %m(2자리 숫자), %B(월 이름), %b(월 이름 약자)
    *   일(day): %d(2자리 숫자)
    *   시(hour): %H(24시간 형식. 0 ~ 23), %I(12시간 형식. 1 ~ 12)
    *   분(minute): %M(2자리 숫자. 0 ~ 59)
    *   초(second): %S(2자리 숫자. 0 ~ 59)

In [26]:
print(current_time.strftime('%b. %d, %y'))
print(current_time.strftime('%B %d, %Y'))

Mar. 02, 21
March 02, 2021


In [29]:
current_time = datetime(2021, 3, 2, 15, 1, 30)
print(current_time.strftime('%H:%M:%S'))
print(current_time.strftime('%I:%M:%S %p'))

15:01:30
03:01:30 PM


In [33]:
# str ---> datetime
print(datetime.strptime('2021-3-2', '%Y-%m-%d'))
print(datetime.strptime('21-03-02', '%y-%m-%d'))
print(datetime.strptime('21-03-02', '%d-%m-%y'))

2021-03-02 00:00:00
2021-03-02 00:00:00
2002-03-21 00:00:00


*   timestamp(UNIX time, POSIX time, epoch time, ...): 1970년 1월 1일 0시 0분 0초를 시작으로 해서 1초마다 1씩 증가하는 숫자.
*   `datetime.timestamp()`: datetime 객체 ---> timestamp(숫자)
*   `datetime.fromtimestamp(숫자)`: timestamp(숫자) ---> datetime 객체


In [35]:
current_time = datetime.now()
print(current_time)
print(current_time.timestamp())  # datetime ---> 숫자

2021-03-02 06:18:20.242223
1614665900.242223


In [37]:
print(datetime.fromtimestamp(1614665900))  # 숫자 ---> datetime

2021-03-02 06:18:20


In [38]:
import pandas as pd

In [39]:
file_path = 'https://github.com/wesm/pydata-book/raw/2nd-edition/datasets/movielens/ratings.dat'
col_names = ['user_id', 'movie_id', 'rating', 'timestamp']

In [40]:
ratings = pd.read_csv(file_path, sep='::', header=None, names=col_names,
                      encoding='cp1252', engine='python')

In [41]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [42]:
datetime.fromtimestamp(978300760)

datetime.datetime(2000, 12, 31, 22, 12, 40)

In [44]:
# ratings 데이터 프레임에 dt 파생변수(timestamp 값을 datetime 객체로 변환한 값) 추가
dates = []  # timestamp(정수)를 datetime 객체로 변환한 값들을 저장할 리스트
for ts in ratings['timestamp']:
    dates.append(datetime.fromtimestamp(ts))

# 리스트 dates를 pd.Series로 변환해서 파생변수로 추가
ratings['dt'] = pd.Series(dates)  

In [45]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp,dt
0,1,1193,5,978300760,2000-12-31 22:12:40
1,1,661,3,978302109,2000-12-31 22:35:09
2,1,914,3,978301968,2000-12-31 22:32:48
3,1,3408,4,978300275,2000-12-31 22:04:35
4,1,2355,5,978824291,2001-01-06 23:38:11


In [46]:
ratings.tail()

Unnamed: 0,user_id,movie_id,rating,timestamp,dt
1000204,6040,1091,1,956716541,2000-04-26 02:35:41
1000205,6040,1094,5,956704887,2000-04-25 23:21:27
1000206,6040,562,5,956704746,2000-04-25 23:19:06
1000207,6040,1096,4,956715648,2000-04-26 02:20:48
1000208,6040,1097,4,956715569,2000-04-26 02:19:29


In [47]:
ratings['dt'].describe()

  """Entry point for launching an IPython kernel.


count                 1000209
unique                 458455
top       2000-11-29 20:06:42
freq                       30
first     2000-04-25 23:05:32
last      2003-02-28 17:49:50
Name: dt, dtype: object

In [50]:
# pd.to_datetime() 함수
ratings['dt2'] = pd.to_datetime(ratings['timestamp'], unit='s')

In [51]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp,dt,dt2
0,1,1193,5,978300760,2000-12-31 22:12:40,2000-12-31 22:12:40
1,1,661,3,978302109,2000-12-31 22:35:09,2000-12-31 22:35:09
2,1,914,3,978301968,2000-12-31 22:32:48,2000-12-31 22:32:48
3,1,3408,4,978300275,2000-12-31 22:04:35,2000-12-31 22:04:35
4,1,2355,5,978824291,2001-01-06 23:38:11,2001-01-06 23:38:11


In [52]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000209 entries, 0 to 1000208
Data columns (total 6 columns):
 #   Column     Non-Null Count    Dtype         
---  ------     --------------    -----         
 0   user_id    1000209 non-null  int64         
 1   movie_id   1000209 non-null  int64         
 2   rating     1000209 non-null  int64         
 3   timestamp  1000209 non-null  int64         
 4   dt         1000209 non-null  datetime64[ns]
 5   dt2        1000209 non-null  datetime64[ns]
dtypes: datetime64[ns](2), int64(4)
memory usage: 45.8 MB


In [53]:
# timestamp(정수) 타입을 datetime 타입으로 변환하는 목적은 년/월/일 등의 정보를 쉽게 추출하기 위해서
ratings['year'] = ratings['dt'].dt.year
ratings['month'] = ratings['dt'].dt.month
ratings['quarter'] = ratings['dt'].dt.quarter

In [54]:
ratings.head(10)

Unnamed: 0,user_id,movie_id,rating,timestamp,dt,dt2,year,month,quarter
0,1,1193,5,978300760,2000-12-31 22:12:40,2000-12-31 22:12:40,2000,12,4
1,1,661,3,978302109,2000-12-31 22:35:09,2000-12-31 22:35:09,2000,12,4
2,1,914,3,978301968,2000-12-31 22:32:48,2000-12-31 22:32:48,2000,12,4
3,1,3408,4,978300275,2000-12-31 22:04:35,2000-12-31 22:04:35,2000,12,4
4,1,2355,5,978824291,2001-01-06 23:38:11,2001-01-06 23:38:11,2001,1,1
5,1,1197,3,978302268,2000-12-31 22:37:48,2000-12-31 22:37:48,2000,12,4
6,1,1287,5,978302039,2000-12-31 22:33:59,2000-12-31 22:33:59,2000,12,4
7,1,2804,5,978300719,2000-12-31 22:11:59,2000-12-31 22:11:59,2000,12,4
8,1,594,4,978302268,2000-12-31 22:37:48,2000-12-31 22:37:48,2000,12,4
9,1,919,4,978301368,2000-12-31 22:22:48,2000-12-31 22:22:48,2000,12,4


In [56]:
# ratings 데이터 프레임에서 2003년도 1사분기 데이터만 추출
ratings[(ratings['year'] == 2003) & (ratings['quarter'] == 1)]

Unnamed: 0,user_id,movie_id,rating,timestamp,dt,dt2,year,month,quarter
8696,59,1175,4,1041967123,2003-01-07 19:18:43,2003-01-07 19:18:43,2003,1,1
8700,59,1252,4,1041967475,2003-01-07 19:24:35,2003-01-07 19:24:35,2003,1,1
8702,59,1183,4,1041963129,2003-01-07 18:12:09,2003-01-07 18:12:09,2003,1,1
8704,59,2997,4,1041962568,2003-01-07 18:02:48,2003-01-07 18:02:48,2003,1,1
8706,59,3871,4,1041968282,2003-01-07 19:38:02,2003-01-07 19:38:02,2003,1,1
...,...,...,...,...,...,...,...,...,...
984842,5950,3317,3,1046369439,2003-02-27 18:10:39,2003-02-27 18:10:39,2003,2,1
984847,5950,3328,3,1046369090,2003-02-27 18:04:50,2003-02-27 18:04:50,2003,2,1
984849,5950,111,5,1046368241,2003-02-27 17:50:41,2003-02-27 17:50:41,2003,2,1
984861,5950,3363,5,1046367948,2003-02-27 17:45:48,2003-02-27 17:45:48,2003,2,1


In [57]:
ratings[(ratings['dt'].dt.year == 2003) & 
        (ratings['dt'].dt.quarter == 1)]

Unnamed: 0,user_id,movie_id,rating,timestamp,dt,dt2,year,month,quarter
8696,59,1175,4,1041967123,2003-01-07 19:18:43,2003-01-07 19:18:43,2003,1,1
8700,59,1252,4,1041967475,2003-01-07 19:24:35,2003-01-07 19:24:35,2003,1,1
8702,59,1183,4,1041963129,2003-01-07 18:12:09,2003-01-07 18:12:09,2003,1,1
8704,59,2997,4,1041962568,2003-01-07 18:02:48,2003-01-07 18:02:48,2003,1,1
8706,59,3871,4,1041968282,2003-01-07 19:38:02,2003-01-07 19:38:02,2003,1,1
...,...,...,...,...,...,...,...,...,...
984842,5950,3317,3,1046369439,2003-02-27 18:10:39,2003-02-27 18:10:39,2003,2,1
984847,5950,3328,3,1046369090,2003-02-27 18:04:50,2003-02-27 18:04:50,2003,2,1
984849,5950,111,5,1046368241,2003-02-27 17:50:41,2003-02-27 17:50:41,2003,2,1
984861,5950,3363,5,1046367948,2003-02-27 17:45:48,2003-02-27 17:45:48,2003,2,1
