# Plotly进阶

- 组合图
- 其他一些常用复杂图表
- 3D

In [1]:
import plotly.express as px
import plotly.graph_objects as go

In [2]:
import pandas as pd
import numpy as np

In [3]:
from plotly.subplots import make_subplots

## 数据准备

- iris
- tips

In [4]:
iris = px.data.iris()
iris.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1


In [13]:
tips = px.data.tips()
tips.head(3)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3


## 组合图

- 原生组合
- 混合

### 原生组合

In [5]:
# 列出所有可能的组合的图的关系图

px.scatter_matrix(
    iris,
    # dimensions, 指定维度
    dimensions=iris.columns.values[0:-2],
    width=720,
    height=480
)

In [6]:
px.scatter(
    iris,
    x="sepal_width",
    y="sepal_length",
    color="species",
    marginal_x="histogram",
    marginal_y="rug",
    width=720,
    height=360
)

### 混合

有别于, 更为传统的不同的图表的组合

In [9]:
tmp = pd.DataFrame(
    {
        "Predicted": np.sort(np.random.uniform(3, 15, 4)),
        "real": np.sort(np.random.uniform(3, 15, 4)),
        "Category": ["A", "B", "C", "D"],
        "new_val": np.random.uniform(3, 15, 4),
    }
)

tmp

Unnamed: 0,Predicted,real,Category,new_val
0,3.70631,5.18923,A,6.397145
1,6.320171,6.492118,B,8.894634
2,6.692354,11.698428,C,10.994224
3,8.74498,14.727857,D,6.179522


In [10]:
# 组合图的实现方式 1

fig = px.bar(
    tmp, x="Category", 
    y=["Predicted", "real", "new_val"], 
    title="test", width=720, height=360
).add_traces(
    px.line(
    tmp, 
    x="Category", 
    y="real", text='real'
    ).update_traces(showlegend=True, name="real").data
).add_traces(
    px.line(
    tmp, 
    x="Category", 
    y="Predicted"
    ).update_traces(showlegend=True, name="Predicted").data
)

# 打印
# print(fig.data)可以看到各个对象的配置
# 对其中的线条颜色进行调整
fig.data[3].line.color = 'orange'
fig.show()

In [11]:
fig = go.Figure()
for e in ["Predicted", "real", "new_val"]:
    fig.add_trace(go.Bar(x=tmp['Category'], 
                        y=tmp[e], 
                        name='bar_' + e))
    
fig.add_trace(go.Scatter(x=tmp['Category'], y=tmp['real'], name='real', ))

fig.update_layout({"title": 'Post Test', 'barmode': 'relative'})

fig.add_trace(go.Scatter(
    x=tmp['Category'],
    y=tmp['Predicted'],
    name="predicted",
    mode='lines+text',
    text=[str(e) for e in range(4)]
))
fig.update_layout({'width': 720, 'height': 360})
fig.show()

## 其他

一些相对复杂的操作

### 热力图

- px.imshow
- density_heatmap

In [14]:
data=[[1, 25, 30, 50, 1], [20, 1, 60, 80, 30], [30, 60, 1, 5, 20]]
fig = px.imshow(data,
                labels=dict(x="Day of Week", 
                            y="Time of Day", 
                            color="Productivity"
                            ),
                x=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'],
                y=['Morning', 'Afternoon', 'Evening']
               )
fig.update_xaxes(side="top")
fig.show()

In [13]:
fig = go.Figure(
    data=go.Histogram2d(
        x=tips['total_bill'], 
        y=tips['tip'],
        z=tips['size'],
        histfunc='sum',
        customdata=[tips['smoker']]
    )
)

fig.update_traces(
    hovertemplate='<br>'.join([
        'Bill $: %{x}',
        'Tip $: %{y}',
        'Size: %{z}',
        'Smokes: %{customdata[0]}'
    ])
)

fig.show()

In [16]:
fig = px.density_heatmap(
    tips, 
    x="total_bill",
    y="tip",
    marginal_x="histogram",
    marginal_y="histogram"
)
fig.show()

## 3D

- scatter_3d

In [None]:
fig = px.scatter_3d(
  iris,
  x="sepal_length",
  y="sepal_width",
  z="petal_width",
  color="species",
  height=360,
  width=720
)

fig.show()

In [None]:
fig = px.scatter_3d(
  iris,
  x="sepal_length",
  y="sepal_width",
  z="petal_width",
  size="petal_length",
  # 散点最大值
  size_max=18,  
  opacity=0.7,
  symbol="species",
  height=360,
  width=720
)
fig.update_layout(margin=dict(l=0,r=0,b=0,t=0))

fig.show()

In [None]:
features = [
    np.random.rand(4,2).tolist(), 
    np.random.rand(5,2).tolist(), 
    np.random.rand(6,2).tolist(), 
    np.random.rand(5,2).tolist(), 
    np.random.rand(9,2).tolist()
]

labels = [[1, 1, 1, 1],
          [1, 1, 1, 1, 1],
          [2, 2, 2, 2, 2, 2],
          [2, 2, 2, 2, 2],
          [0, 0, 0, 0, 0, 0, 0, 0, 0]]

fig = go.Figure()
for i, feat in enumerate(features):
    feat = np.array(feat)
    fig.add_trace(
        go.Scatter3d(
            x=np.arange(len(feat)),
            y=feat[:,0],
            z=feat[:,1],
            mode='lines',
            hovertext=labels[i]
        )
    )
fig.show()

## 曲面

- go.Surface

非常消耗性能

In [None]:
x = np.arange(-5, 6)
y = np.arange(-5, 6)

In [None]:
data = np.meshgrid(x, y)

In [None]:
z = data[0] ** 2 + data[1] ** 2

In [None]:
fig = go.Figure(go.Surface(
    x=data[0],
    y=data[1],
    z=z
))

fig.update_traces(
    # 轮廓设置
    contours_z=dict(
        # 开启是否显示
        show=True,  
        usecolormap=True,
        # 颜色设置
        highlightcolor="mistyrose",
        # 高亮
        project_z=True
    )
)

fig.update_layout(
    title='带有轮廓的3D平面图',
    autosize=False,
    # 视角
    scene_camera_eye=dict(x=1.87, y=0.88, z=-0.64),
    width=600, 
    height=500,
    margin=dict(l=65, r=50, b=65, t=90)
)

## 滑块

- rangeslider

In [4]:
x = [1, 2, 3, 4, 5]
y = [10, 20, 30, 40, 50]

trace1 = go.Scatter(x=x, y=y, mode='markers')

fig = make_subplots(rows=1, cols=1, specs=[[{'type': 'scatter'}]])

fig.add_trace(trace1, row=1, col=1)
fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1, label="1月", step="month", stepmode="backward"),
                dict(count=6, label="6月", step="month", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=1, label="1年", step="year", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    )
)

fig.show()

## 动态效果

- animation_frame

In [5]:
from vega_datasets import data

In [6]:
dis = data.disasters()
dis.head(3)

Unnamed: 0,Entity,Year,Deaths
0,All natural disasters,1900,1267360
1,All natural disasters,1901,200018
2,All natural disasters,1902,46037


In [8]:
dy = dis[dis.Year > 1990]

In [9]:
dy.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 257 entries, 90 to 802
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Entity  257 non-null    object
 1   Year    257 non-null    int64 
 2   Deaths  257 non-null    int64 
dtypes: int64(2), object(1)
memory usage: 8.0+ KB


In [11]:
fig = px.bar(
    dy,
    y="Entity",
    x="Deaths",
    animation_frame="Year",
    orientation='h',
    range_x=[0, dy.Deaths.max()],
    color="Entity"
)
fig.update_layout(
    width=720,
    height=600,
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    title_text='Evolution of Natural Disasters',
    showlegend=False
)

fig.update_xaxes(title_text='Number of Deaths')
fig.update_yaxes(title_text='')
fig.show()

In [14]:

fig = go.Figure(go.Sunburst(labels=[
    "Female", "Male", "Dinner", "Lunch", 'Dinner ', 'Lunch ', 'Fri', 'Sat',
    'Sun', 'Thu', 'Fri ', 'Thu ', 'Fri  ', 'Sat  ', 'Sun  ', 'Fri   ', 'Thu   '
],
                            parents=[
                                "", "", "Female", "Female", 'Male', 'Male',
                                'Dinner', 'Dinner', 'Dinner', 'Dinner',
                                'Lunch', 'Lunch', 'Dinner ', 'Dinner ',
                                'Dinner ', 'Lunch ', 'Lunch '
                            ],
                            values=np.append(
                                np.append(
                                    tips.groupby('sex').tip.mean().values,
                                    tips.groupby(['sex',
                                                'time']).tip.mean().values,
                                ),
                                tips.groupby(['sex', 'time',
                                            'day']).tip.mean().values),
                            marker=dict(colors=px.colors.sequential.Emrld)),
                layout=go.Layout(paper_bgcolor='rgba(0,0,0,0)',
                                 plot_bgcolor='rgba(0,0,0,0)'))
fig.update_layout(margin=dict(t=0, l=0, r=0, b=0),
                  title_text='Tipping Habbits Per Gender, Time and Day')

fig.show()

In [33]:
fig = go.Figure(go.Indicator(
    domain = {'x': [0, 1], 'y': [0, 1]},
    value = 4.3,
    mode = "gauge+number+delta",
    title = {'text': "Success Metric"},
    delta = {'reference': 3.9},
    gauge = {'bar': {'color': "lightgreen"},
        'axis': {'range': [None, 5]},
             'steps' : [
                 {'range': [0, 2.5], 'color': "lightgray"},
                 {'range': [2.5, 4], 'color': "gray"}],
          })).update_layout(width=720, height=360)
fig.show()

In [22]:
movies = data.movies()
tmp = movies.dropna()

In [23]:
tmp.Major_Genre.factorize()

(array([ 0,  0,  0,  1,  2,  3,  4,  4,  3,  5,  3,  4,  0,  0,  0,  3,  2,
         0,  4,  5,  0,  3,  0,  3,  0,  5,  5,  4,  2,  3,  4,  2,  4,  4,
         2,  3,  0,  4,  2,  0,  3,  0,  6,  7,  2,  2,  8,  4,  3,  0,  4,
         3,  3,  0,  3,  3,  3,  3,  3,  0,  8,  4,  5,  7,  2,  4,  2,  4,
         8,  5,  5,  5,  5,  4,  5,  5,  5,  4,  0,  8,  3,  0,  4,  5,  8,
         0,  2,  3,  2,  2,  4,  3,  6,  3,  3,  3,  8,  3,  4,  4,  3,  9,
         3,  3,  5,  4,  0,  5,  3,  3,  3,  2,  3,  5,  3,  3,  5,  5,  6,
         3,  3,  8,  2,  3,  4,  3,  6,  3, 10,  0,  3,  2,  4,  2,  0,  6,
         6,  6,  4,  5,  2,  4,  3,  4,  2,  2,  4,  5,  5,  5,  4,  2,  3,
         0,  5,  0,  0,  6,  4,  3,  5,  2,  3,  4,  3,  4,  5,  3,  0,  3,
         4,  4,  4,  4], dtype=int64),
 Index(['Action', 'Western', 'Thriller/Suspense', 'Drama', 'Comedy',
        'Adventure', 'Horror', 'Musical', 'Romantic Comedy', 'Black Comedy',
        'Documentary'],
       dtype='object'))

In [24]:
tmp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 174 entries, 1064 to 3196
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Title                   174 non-null    object 
 1   US_Gross                174 non-null    float64
 2   Worldwide_Gross         174 non-null    float64
 3   US_DVD_Sales            174 non-null    float64
 4   Production_Budget       174 non-null    float64
 5   Release_Date            174 non-null    object 
 6   MPAA_Rating             174 non-null    object 
 7   Running_Time_min        174 non-null    float64
 8   Distributor             174 non-null    object 
 9   Source                  174 non-null    object 
 10  Major_Genre             174 non-null    object 
 11  Creative_Type           174 non-null    object 
 12  Director                174 non-null    object 
 13  Rotten_Tomatoes_Rating  174 non-null    float64
 14  IMDB_Rating             174 non-null  

In [20]:
pd.factorize(['b', 'b', 'a', 'c', 'b'])

(array([0, 0, 1, 2, 0], dtype=int64), array(['b', 'a', 'c'], dtype=object))

**pd.factorize**

Encode the object as an enumerated type or categorical variable.

This method is useful for obtaining a numeric representation of an array when all that matters is identifying distinct values

In [35]:
tmp.loc[:,'Genre_id'] = tmp.loc[:,'Major_Genre'].factorize()[0]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [30]:
tmp.loc[:,'Genre_id']

1064    0
1074    0
1090    0
1095    1
1107    2
       ..
3158    3
3181    4
3183    4
3195    4
3196    4
Name: Genre_id, Length: 174, dtype: int64

In [31]:
tmp.head(3)

Unnamed: 0,Title,US_Gross,Worldwide_Gross,US_DVD_Sales,Production_Budget,Release_Date,MPAA_Rating,Running_Time_min,Distributor,Source,Major_Genre,Creative_Type,Director,Rotten_Tomatoes_Rating,IMDB_Rating,IMDB_Votes,Genre_id
1064,12 Rounds,12234694.0,18184083.0,8283859.0,20000000.0,Mar 27 2009,PG-13,108.0,20th Century Fox,Original Screenplay,Action,Contemporary Fiction,Renny Harlin,28.0,5.4,8914.0,0
1074,2012,166112167.0,766812167.0,50736023.0,200000000.0,Nov 13 2009,PG-13,158.0,Sony Pictures,Original Screenplay,Action,Science Fiction,Roland Emmerich,39.0,6.2,396.0,0
1090,300,210614939.0,456068181.0,261252400.0,60000000.0,Mar 09 2007,R,117.0,Warner Bros.,Based on Comic/Graphic Novel,Action,Historical Fiction,Zack Snyder,60.0,7.8,235508.0,0


In [32]:

fig = px.parallel_categories(
    tmp,
    dimensions=['MPAA_Rating', 'Creative_Type', 'Major_Genre'],
    color="Genre_id",
    color_continuous_scale=px.colors.sequential.Emrld,
)
fig.show()