<a href="https://colab.research.google.com/github/OHTORO87/ai_class/blob/main/colab_d16_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## pandas 날짜처리

In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

In [4]:
d1 = pd.to_datetime('20210526')

In [5]:
d1

Timestamp('2021-05-26 00:00:00')

In [6]:
d2 = pd.to_datetime('20210526095412')

In [7]:
d2

Timestamp('2021-05-26 09:54:12')

In [9]:
type(d2)

pandas._libs.tslibs.timestamps.Timestamp

## 데이터프레임으로 만들어서 dtype이 적용되게 만들어보자

In [11]:
tt = ['20201230121622','20100412031244','19990312091642']

In [12]:
pd1 = pd.DataFrame(tt)

In [13]:
pd1

Unnamed: 0,0
0,20201230121622
1,20100412031244
2,19990312091642


In [14]:
pd1.dtypes

0    object
dtype: object

In [18]:
pd1.columns = ['birthday'] 

In [24]:
pd1['birthday'] = pd.to_datetime(pd1['birthday'])

In [25]:
pd1

Unnamed: 0,birthday
0,2020-12-30 12:16:22
1,2010-04-12 03:12:44
2,1999-03-12 09:16:42


## 날짜를 처리할수있는 datetime으로 바뀌었다

In [26]:
pd1.dtypes

birthday    datetime64[ns]
dtype: object

In [27]:
pd1['birthday'].dt.year

0    2020
1    2010
2    1999
Name: birthday, dtype: int64

In [28]:
pd1['birthday'].dt.month

0    12
1     4
2     3
Name: birthday, dtype: int64

In [29]:
pd1['birthday'].dt.day

0    30
1    12
2    12
Name: birthday, dtype: int64

In [31]:
pd1['birthday'].dt.dayofweek

0    2
1    0
2    4
Name: birthday, dtype: int64

## 날짜끼리 선후비교
### 더하기 빼기 다 가능하다

In [32]:
pd1['birthday'] > '20151122123344'

0     True
1    False
2    False
Name: birthday, dtype: bool

## 오름차순 정렬

In [33]:
pd1.sort_values(by='birthday')

Unnamed: 0,birthday
2,1999-03-12 09:16:42
1,2010-04-12 03:12:44
0,2020-12-30 12:16:22


## 내림차순 정렬

In [37]:
pd1.sort_values(by='birthday', ascending=False)

Unnamed: 0,birthday
0,2020-12-30 12:16:22
1,2010-04-12 03:12:44
2,1999-03-12 09:16:42


## 특정한 시점부터 날짜 생성

In [38]:
pd.date_range(start='2002-01-01', periods=3) # freq 디폴트값 day

DatetimeIndex(['2002-01-01', '2002-01-02', '2002-01-03'], dtype='datetime64[ns]', freq='D')

In [39]:
pd.date_range(start='2002-01-01', periods=3, freq='D')

DatetimeIndex(['2002-01-01', '2002-01-02', '2002-01-03'], dtype='datetime64[ns]', freq='D')

In [40]:
pd.date_range(start='2002-01-01', periods=3, freq='M')

DatetimeIndex(['2002-01-31', '2002-02-28', '2002-03-31'], dtype='datetime64[ns]', freq='M')

In [41]:
pd.date_range(start='2002-01-01', periods=3, freq='Y')

DatetimeIndex(['2002-12-31', '2003-12-31', '2004-12-31'], dtype='datetime64[ns]', freq='A-DEC')

In [42]:
pd.date_range(start='2002-01-01', periods=3, freq='W')

DatetimeIndex(['2002-01-06', '2002-01-13', '2002-01-20'], dtype='datetime64[ns]', freq='W-SUN')

In [None]:
# 2020년 6월1일부터 12개를 만든다
# 조건은 주 단위로

In [43]:
pd.date_range(start='2002-06-01', periods=12, freq='W')

DatetimeIndex(['2002-06-02', '2002-06-09', '2002-06-16', '2002-06-23',
               '2002-06-30', '2002-07-07', '2002-07-14', '2002-07-21',
               '2002-07-28', '2002-08-04', '2002-08-11', '2002-08-18'],
              dtype='datetime64[ns]', freq='W-SUN')

## Bokeh import 하기

In [44]:
# Standard imports 

from bokeh.io import output_notebook, show
output_notebook()

In [47]:
# Plot a complex chart with interactive hover in a few lines of code

from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap

df.cyl = df.cyl.astype(str)
df.yr = df.yr.astype(str)

group = df.groupby(by=['cyl', 'mfr'])
source = ColumnDataSource(group)

p = figure(plot_width=800, plot_height=300, title="Mean MPG by # Cylinders and Manufacturer",
           x_range=group, toolbar_location=None, tools="")

p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2

index_cmap = factor_cmap('cyl_mfr', palette=['#2b83ba', '#abdda4', '#ffffbf', '#fdae61', '#d7191c'], 
                         factors=sorted(df.cyl.unique()), end=1)

p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=source,
       line_color="white", fill_color=index_cmap, 
       hover_line_color="darkgrey", hover_fill_color=index_cmap)

p.add_tools(HoverTool(tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")]))

show(p)

In [46]:
# Create and deploy interactive data applications

from IPython.display import IFrame
IFrame('https://demo.bokeh.org/sliders', width=900, height=500)

In [48]:
import numpy as np
import pandas as pd
import seaborn as sns
from bokeh.io import output_notebook, show
from bokeh.plotting import figure

output_notebook()
p = figure(plot_width=600, plot_height=400)
show(p)



In [59]:
x = [1, 2, 3, 4]
y = [1, 4, 9, 16]
p.circle(x, y, size=20, line_color='red', fill_color='white', fill_alpha=0.9)
show(p)