# PANDAS DATE-TIME 다루기

## 날짜시간 관련 PANDAS

 - 관련 자료형 : timestamp, datetime64, period
 
 - 관련 메서드: to_datetime(). date_range(), to_period(), period_range()

In [73]:
# 모듈 로딩
import pandas as pd

# DF 객체 생성
df=pd.DataFrame({'year':[2021,2022],
                 'month':[11,12],
                 'day':[1,24]})

df.info

<bound method DataFrame.info of    year  month  day
0  2021     11    1
1  2022     12   24>

In [74]:
# DF 객체 타입 정보
df.dtypes

year     int64
month    int64
day      int64
dtype: object

In [75]:
# DateTime으로 변환 => pandas.to_datetime()
result=pd.to_datetime(df)

print(type(result), result,sep='\n')

<class 'pandas.core.series.Series'>
0   2021-11-01
1   2022-12-24
dtype: datetime64[ns]


In [76]:
one=result[0]
print(type(one),one,sep='\n')

<class 'pandas._libs.tslibs.timestamps.Timestamp'>
2021-11-01 00:00:00


In [77]:
one.day, one.month, one.month_name, one.hour

(1, 11, <function Timestamp.month_name>, 0)

# 타임 및 다른 포맷 변환 메서드들..... to_변환타입()

In [78]:
result.to_frame()

Unnamed: 0,0
0,2021-11-01
1,2022-12-24


In [79]:
result.to_string()

'0   2021-11-01\n1   2022-12-24'

In [80]:
# 파일 생성

In [81]:
result.to_csv('result.csv',index=False)

In [82]:
result.to_json('result.json')

## CSV 파일 활용 실습

 - 날짜 데이터가 있는 CSV 파일 처리하기

In [83]:
# CSV FILE ==> DataFrame으로 가져오기
vis=pd.read_csv('../Data/survey_visited.csv')

# 결측치 처리
vis.fillna(method='ffill',inplace=True)

vis.info(), vis

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   ident   8 non-null      int64 
 1   site    8 non-null      object
 2   dated   8 non-null      object
dtypes: int64(1), object(2)
memory usage: 320.0+ bytes


(None,
    ident   site       dated
 0    619   DR-1  1927-02-08
 1    622   DR-1  1927-02-10
 2    734   DR-3  1939-01-07
 3    735   DR-3  1930-01-12
 4    751   DR-3  1930-02-26
 5    752   DR-3  1930-02-26
 6    837  MSK-4  1932-01-14
 7    844   DR-1  1932-03-22)

In [84]:
# 날짜 관련 데이터 ==> 자료형 변환하기
vis['dated']=pd.to_datetime(vis['dated'])
vis['dated']

0   1927-02-08
1   1927-02-10
2   1939-01-07
3   1930-01-12
4   1930-02-26
5   1930-02-26
6   1932-01-14
7   1932-03-22
Name: dated, dtype: datetime64[ns]

In [85]:
vis.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   ident   8 non-null      int64         
 1   site    8 non-null      object        
 2   dated   8 non-null      datetime64[ns]
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 320.0+ bytes


In [94]:
vis['dated'][0], vis['dated'][0].year, vis['dated'][0].month, vis['dated'][0].day

(Timestamp('1927-02-08 00:00:00'), 1927, 2, 8)

In [96]:
type(vis['dated']), vis['dated'].dtype

(pandas.core.series.Series, dtype('<M8[ns]'))

In [97]:
vis['dated'].dt.year

0    1927
1    1927
2    1939
3    1930
4    1930
5    1930
6    1932
7    1932
Name: dated, dtype: int64

In [98]:
vis['dated'].dt.month

0    2
1    2
2    1
3    1
4    2
5    2
6    1
7    3
Name: dated, dtype: int64

In [102]:
dateSR=vis['dated']

In [103]:
dateSR

0   1927-02-08
1   1927-02-10
2   1939-01-07
3   1930-01-12
4   1930-02-26
5   1930-02-26
6   1932-01-14
7   1932-03-22
Name: dated, dtype: datetime64[ns]

In [101]:
dateSR.dt.year>=1930

0    False
1    False
2     True
3     True
4     True
5     True
6     True
7     True
Name: dated, dtype: bool

In [104]:
dateSR[dateSR.dt.year>=1930]

2   1939-01-07
3   1930-01-12
4   1930-02-26
5   1930-02-26
6   1932-01-14
7   1932-03-22
Name: dated, dtype: datetime64[ns]

In [105]:
dateSR[dateSR.dt.month>=2]

0   1927-02-08
1   1927-02-10
4   1930-02-26
5   1930-02-26
7   1932-03-22
Name: dated, dtype: datetime64[ns]