In [2]:
"""
Native Python dates and times
"""

# Build a date using the datetime type

from datetime import datetime

t = datetime(year=2021, month=10, day=27)
print(t)
# 설정한대로 나옴
print(type(t))

2021-10-27 00:00:00
<class 'datetime.datetime'>


In [3]:
# Parse a date from a string

from dateutil import parser

date = parser.parse("27th of October, 2021")
print(date)

2021-10-27 00:00:00


In [4]:
# Print the day of the week

date.strftime('%A')


'Wednesday'

In [9]:
"""
Types Arrays of Times: NumPy’s datetime64
"""
import numpy as np
# Create datetime64 using array()
date = np.array(['2021-10-27','2021-10-28'], dtype=np.datetime64)
print(type(date[0]))

print(date)

# Use datetune64()
date = np.datetime64('2021-10-27')
print(date)

<class 'numpy.datetime64'>
['2021-10-27' '2021-10-28']
2021-10-27


In [10]:
# We can quickly do vectorized operations on datetime64

date + np.arange(7)
# 단일 스칼라값 + [0, 1, ..., 6] 배열

array(['2021-10-27', '2021-10-28', '2021-10-29', '2021-10-30',
       '2021-10-31', '2021-11-01', '2021-11-02'], dtype='datetime64[D]')

In [11]:
# Minite-based datetime

# 값 자체를 입력
t = np.datetime64('2021-10-27 09:27')
print(t)


2021-10-27T09:27


In [12]:
# Nanosecond-based datetime

t = np.datetime64('2021-10-27 09:27:10.20', 'ns')
print(t)

2021-10-27T09:27:10.200000000


In [13]:
"""
Dates and Times in Pandas
"""
import pandas as pd
# Parse a string date in pandas

date = pd.to_datetime("27th of October, 2021")
# 문자열 -> parser같은 역할을 하는 pandas의 to_datetime
print(date)

2021-10-27 00:00:00


In [14]:
# Output the day of the week

print(date.strftime('%A'))

Wednesday


In [15]:
# NumPy-style vectorized operations

date + pd.to_timedelta(np.arange(7))
# 27일 00시를 기준으로 1_6시까지(?)
# 스칼라+벡터, 스칼라+배열 간 연산 => 또 다른 배열 결과

# pandas에서 타임스탬프는 우리가 분석하고자 하는 데이터의 인덱스로 사용됨

DatetimeIndex([          '2021-10-27 00:00:00',
               '2021-10-27 00:00:00.000000001',
               '2021-10-27 00:00:00.000000002',
               '2021-10-27 00:00:00.000000003',
               '2021-10-27 00:00:00.000000004',
               '2021-10-27 00:00:00.000000005',
               '2021-10-27 00:00:00.000000006'],
              dtype='datetime64[ns]', freq=None)

In [20]:
"""
Pandas Time Seires: Indexing by Time
"""

# Indexing by timestamps

ind = pd.DatetimeIndex(['2021-10-27', '2021-10-28', '2021-10-29', '2021-10-30',
                        '2021-10-31', '2021-11-01', '2021-11-02'])
# 인덱스로 설정하고 
print(ind)

# 인덱스를 적용해줄 데이터
ser = pd.Series([0, 1, 2, 3, 4, 5, 6], index=ind) # ind를 인덱스로 사용하겠다
print(ser)

ser['2021-10-27':'2021-10-31']

DatetimeIndex(['2021-10-27', '2021-10-28', '2021-10-29', '2021-10-30',
               '2021-10-31', '2021-11-01', '2021-11-02'],
              dtype='datetime64[ns]', freq=None)
2021-10-27    0
2021-10-28    1
2021-10-29    2
2021-10-30    3
2021-10-31    4
2021-11-01    5
2021-11-02    6
dtype: int64


2021-10-27    0
2021-10-28    1
2021-10-29    2
2021-10-30    3
2021-10-31    4
dtype: int64

In [23]:
# Indexing patterns

print(ser['2021-10-27':'2021-10-31'], '\n')

print(ser['2021-11'], '\n')

print(ser['2021'])

2021-10-27    0
2021-10-28    1
2021-10-29    2
2021-10-30    3
2021-10-31    4
dtype: int64 

2021-11-01    5
2021-11-02    6
dtype: int64 

2021-10-27    0
2021-10-28    1
2021-10-29    2
2021-10-30    3
2021-10-31    4
2021-11-01    5
2021-11-02    6
dtype: int64


In [24]:
"""
Pandas Time Series Data Structures
"""

# Timestamp and DatetimeIndex

# 배열 내 데이터들이 시간을 표현하는 방식이 저마다 다름
dates = pd.to_datetime([datetime(2015, 7, 3),
                      "4th of July, 2015",
                      '2015-Jul-6',
                      '07-07-2015',
                      '20150708'])

print(dates) # 잘 이해해서 통일된 형태로 보여줌

DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-06', '2015-07-07',
               '2015-07-08'],
              dtype='datetime64[ns]', freq=None)


In [25]:
# DatetimeIndex -> PeriodIndex with to_period()

dates.to_period('D') # '시간단위'

PeriodIndex(['2015-07-03', '2015-07-04', '2015-07-06', '2015-07-07',
             '2015-07-08'],
            dtype='period[D]', freq='D')

In [26]:
# TimedeltaIndex

dates - dates[0]

TimedeltaIndex(['0 days', '1 days', '3 days', '4 days', '5 days'], dtype='timedelta64[ns]', freq=None)

In [27]:
"""
Regular Sequences
"""

# pd.date_range(). default freq: day

pd.date_range('2021-10-27', '2021-10-31')
# default옵션이 '일별'로 지정되어있음

DatetimeIndex(['2021-10-27', '2021-10-28', '2021-10-29', '2021-10-30',
               '2021-10-31'],
              dtype='datetime64[ns]', freq='D')

In [28]:
# Use startpoint and num. of periods

pd.date_range('2021-10-27', periods=5)

DatetimeIndex(['2021-10-27', '2021-10-28', '2021-10-29', '2021-10-30',
               '2021-10-31'],
              dtype='datetime64[ns]', freq='D')

In [29]:
# Change the frequency

pd.date_range('2021-10-27', periods=12, freq='H') # 시간단위

DatetimeIndex(['2021-10-27 00:00:00', '2021-10-27 01:00:00',
               '2021-10-27 02:00:00', '2021-10-27 03:00:00',
               '2021-10-27 04:00:00', '2021-10-27 05:00:00',
               '2021-10-27 06:00:00', '2021-10-27 07:00:00',
               '2021-10-27 08:00:00', '2021-10-27 09:00:00',
               '2021-10-27 10:00:00', '2021-10-27 11:00:00'],
              dtype='datetime64[ns]', freq='H')

In [30]:
# pd.period_range()

pd.period_range('2021-10', periods=12, freq='M')
# 시간차, 기간 -> 인덱스로 활용하겠다

PeriodIndex(['2021-10', '2021-11', '2021-12', '2022-01', '2022-02', '2022-03',
             '2022-04', '2022-05', '2022-06', '2022-07', '2022-08', '2022-09'],
            dtype='period[M]', freq='M')

In [31]:
# pd.timedelta_range()

pd.timedelta_range(0, periods=10, freq='H')

TimedeltaIndex(['0 days 00:00:00', '0 days 01:00:00', '0 days 02:00:00',
                '0 days 03:00:00', '0 days 04:00:00', '0 days 05:00:00',
                '0 days 06:00:00', '0 days 07:00:00', '0 days 08:00:00',
                '0 days 09:00:00'],
               dtype='timedelta64[ns]', freq='H')