### Plotly 소개
 * d3.js를 이용하여 interactive하게 그래프를 보여준다.

### 사전 파이썬 버전 확인
```
(base) C:\Users\toto>python --version
Python 3.7.7
```

### plotly를 pandas와 함께 사용하는 법
 * cufflinks 설정과 .iplot()을 활용. pandas.plot()와 같이 판다스 데이터 시각화
 * plotly.express 라이브러리 활용

### cuffilinks 는 무엇
 * 판다스 데이터 프레임과 Plotly를 연결하여 사용자가 판다스로부터 직접 시각화를 할 수 있는 라이브러리

### 01 시작하기 - 설치(Plotly and Cufflinks)
 * pip install plotly
 * pip install cufflinks

In [8]:
import plotly
import cufflinks as cf
import pandas as pd
import numpy as np

### 버전 확인

In [9]:
print(plotly.__version__)
print(cf.__version__)
print(pd.__version__)
print(np.__version__)

4.2.1
0.17.3
1.0.3
1.18.4


In [10]:
#Enabling the offline mode for interactive plotting locally
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
cf.go_offline()

### 데이터 생성 및 plot

In [15]:
#create Data
df = pd.DataFrame(np.random.randn(100,4),    # 100개 4개 컬럼 
                  columns='A B C D'.split())

print(df.shape)
df.head()

(100, 4)


Unnamed: 0,A,B,C,D
0,-1.024738,-1.351902,2.043107,1.429486
1,0.017411,-1.385388,-2.597137,-0.096264
2,1.307811,-1.087958,0.04508,-0.703776
3,-0.600373,1.449807,-0.620434,-0.405395
4,0.03958,1.413396,-0.536769,-1.35044


In [22]:
df2 = pd.DataFrame({'items':['bag','apple','cap'],'Values':[32,43,50,]})
df2

Unnamed: 0,items,Values
0,bag,32
1,apple,43
2,cap,50


### Line Plot

In [23]:
df.iplot()

### Scatter Plot

In [24]:
df.iplot(kind='scatter', x='A',y='B',mode='markers',size=20)

### Bar Plot

In [26]:
df2.iplot(kind='bar',x='items',y='Values')

In [29]:
df = pd.DataFrame(np.random.rand(10,4), 
                  columns=['A', 'B', 'C', 'D'])
df.head()

Unnamed: 0,A,B,C,D
0,0.186949,0.389418,0.278265,0.896099
1,0.6096,0.272783,0.940511,0.494151
2,0.12707,0.290569,0.754079,0.11931
3,0.004488,0.646259,0.105027,0.128515
4,0.368267,0.430466,0.852968,0.375185


In [30]:
df.iplot(kind='bar')

### A컬럼만 보기

In [32]:
df['A'].iplot(kind='bar')

### stack plot

In [31]:
df.iplot(kind='bar', barmode='stack')

In [33]:
df.iplot(kind='barh', barmode='stack')

### Box Plot

In [27]:
df.iplot(kind='box')

### 3D Surface Plot

In [21]:
df3 = pd.DataFrame({'x':[1,2,3,4,5],
                    'y':[10,20,30,40,60],
                    'z':[5,4,3,2,1]})
df3

Unnamed: 0,x,y,z
0,1,10,5
1,2,20,4
2,3,30,3
3,4,40,2
4,5,60,1


In [28]:
df3.iplot(kind='surface',colorscale='rdylbu')

### Line Charts

In [5]:
df = cf.datagen.lines()
df.head()

Unnamed: 0,ZIA.PT,FMH.FS,ZWE.TA,HGH.HM,QGS.VB
2015-01-01,0.701895,1.0884,1.365448,-0.435339,-1.419703
2015-01-02,0.535656,0.212881,2.054125,0.365692,-0.781627
2015-01-03,-1.603971,-0.324437,2.102971,0.050048,-2.20036
2015-01-04,-2.497942,-2.212996,3.199429,1.071881,-1.603421
2015-01-05,-1.061824,-4.055288,3.540398,2.77725,-1.668406


In [6]:
df.iplot(kind='line')

In [38]:
print(df.shape)
df.head(10)

(10, 4)


Unnamed: 0,A,B,C,D
0,0.186949,0.389418,0.278265,0.896099
1,0.6096,0.272783,0.940511,0.494151
2,0.12707,0.290569,0.754079,0.11931
3,0.004488,0.646259,0.105027,0.128515
4,0.368267,0.430466,0.852968,0.375185
5,0.384756,0.933615,0.839619,0.988816
6,0.607046,0.693675,0.736342,0.687044
7,0.31393,0.042654,0.391986,0.593443
8,0.113184,0.387231,0.209746,0.348256
9,0.420626,0.092529,0.09553,0.446285


### Plot Styling

### 테마(Theme) 설정

In [39]:
themes = cf.getThemes()
themes

['ggplot', 'pearl', 'solar', 'space', 'white', 'polar', 'henanigans']

In [40]:
data = pd.Series(range(10))
for theme in themes:
    data.iplot(kind='bar', theme=theme, title=theme)

### 테마 설정

In [41]:
cf.set_config_file(theme='pearl')

## Plotly express 사용한 시각화

* cufflinks보다 좀 더 다양하며, 사용방법은 seaborn과 비슷함.
* plotly_express 이용. plotly 4.1 부터는 별도 설치 없어도 됨. 3.8.1의 경우 설치 필요

In [42]:
import plotly.express as px

In [43]:
# iris 데이터 불러오기
print(px.data.iris.__doc__)
px.data.iris().head()


    Each row represents a flower.

    https://en.wikipedia.org/wiki/Iris_flower_data_set

    Returns:
        A `pandas.DataFrame` with 150 rows and the following columns: `['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species',
       'species_id']`.
    


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1


### 산점도(scatter plot) and Line Plots(선 그래프)

In [45]:
import plotly.express as px
df = px.data.iris()
fig = px.scatter(df, x="sepal_width", y="sepal_length")
fig.show()

In [46]:
import plotly.express as px
df = px.data.iris()
fig = px.scatter(df, x="sepal_width", y="sepal_length", color="species")
fig.show()

In [47]:
import plotly.express as px
df = px.data.iris()
fig = px.scatter(df, 
           x="sepal_width", y="sepal_length", 
           color="species", marginal_y="violin",
           marginal_x="box", trendline="ols")
fig.show()

In [50]:
import plotly.express as px
df = px.data.iris()
fig = px.scatter_matrix(df, dimensions=["sepal_width", 
                                        "sepal_length", 
                                        "petal_width", 
                                        "petal_length"], 
                        color="species")
fig.show()

In [51]:
import plotly.express as px
df = px.data.tips()
fig = px.parallel_categories(df, color="size", color_continuous_scale=px.colors.sequential.Inferno)
fig.show()

In [57]:
df = px.data.gapminder()
print(df.shape)
print(df.columns)
print(px.data.gapminder.__doc__)

(1704, 8)
Index(['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
       'iso_alpha', 'iso_num'],
      dtype='object')

    Each row represents a country on a given year.

    https://www.gapminder.org/data/

    Returns:
        A `pandas.DataFrame` with 1704 rows and the following columns: `['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
       'iso_alpha', 'iso_num']`.
    


In [52]:
import plotly.express as px
df = px.data.gapminder()
fig = px.scatter(df.query("year==2007"), 
                 x="gdpPercap", 
                 y="lifeExp", 
                 size="pop", 
                 color="continent",
                 hover_name="country", log_x=True, size_max=60)
fig.show()

In [58]:
import plotly.express as px
df = px.data.gapminder()
fig = px.scatter(df, x="gdpPercap", y="lifeExp", 
                 animation_frame="year", 
                 animation_group="country",
                 size="pop", 
                 color="continent", 
                 hover_name="country", 
                 facet_col="continent",
                 log_x=True, size_max=45, range_x=[100,100000], range_y=[25,90])
fig.show()

### Barplot

In [63]:
import plotly.express as px
df = px.data.tips()
fig = px.bar(df, x="sex", y="total_bill", color="smoker", barmode="group")
fig.show()

### 3D

In [65]:
df = px.data.election()
print(df.shape)
print(df.head())
print(df.columns)
print(px.data.election.__doc__)

(58, 7)
                district  Coderre  Bergeron  Joly  total    winner     result
0     101-Bois-de-Liesse     2481      1829  3024   7334      Joly  plurality
1  102-Cap-Saint-Jacques     2525      1163  2675   6363      Joly  plurality
2   11-Sault-au-Récollet     3348      2770  2532   8650   Coderre  plurality
3           111-Mile-End     1734      4782  2514   9030  Bergeron   majority
4         112-DeLorimier     1770      5933  3044  10747  Bergeron   majority
Index(['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result'], dtype='object')

    Each row represents voting results for an electoral district in the 2013 Montreal mayoral election.

    Returns:
        A `pandas.DataFrame` with 58 rows and the following columns: `['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result']`.
    


In [66]:
import plotly.express as px
df = px.data.election()
fig = px.line_3d(df, x="Joly", y="Coderre", z="Bergeron", color="winner", line_dash="winner")
fig.show()

### Maps

In [68]:
import plotly.express as px
df = px.data.gapminder()
fig = px.scatter_geo(df, 
                     locations="iso_alpha", 
                     color="continent", 
                     hover_name="country", 
                     size="pop",
               animation_frame="year", projection="natural earth")
fig.show()

In [69]:
import plotly.express as px
df = px.data.gapminder()
fig = px.line_geo(df.query("year==2007"), locations="iso_alpha", color="continent", projection="orthographic")
fig.show()

### REF
 * cufflinks.datagen module
 * https://jpoles1.github.io/cufflinks/html/cufflinks.datagen.html
 
* Plotly Express in Python
* https://plot.ly/python/plotly-express/#plotly-express