https://plotly.com/python/violin/

In [59]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.colors import n_colors

# px.violin

## Basic

In [3]:
y0 = np.random.randn(50) - 1
y1 = np.random.randn(50) + 1

In [4]:
fig = px.violin(y=[y0, y1])
fig.show()

## Horizontal Box Plot

In [5]:
fig = px.violin(x=[y0, y1])
fig.show()

## DataFrame

In [6]:
tips = px.data.tips()
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [7]:
fig = px.violin(data_frame=tips, y="total_bill")
fig.show()

In [8]:
fig = px.violin(data_frame=tips, x="total_bill")
fig.show()

## Violin plot with box and data points

In [10]:
fig = px.violin(
    data_frame=tips,
    y="total_bill",
    box=True,  # draw box plot inside the violin
    points="all",  # ['all', 'outliers', 'suspectedoutliers', False]
)
fig.show()

In [11]:
fig = px.violin(
    data_frame=tips,
    y="total_bill",
    box=True,  # draw box plot inside the violin
    points="outliers",  # ['all', 'outliers', 'suspectedoutliers', False]
)
fig.show()

In [12]:
fig = px.violin(
    data_frame=tips,
    y="total_bill",
    box=True,  # draw box plot inside the violin
    points="suspectedoutliers",  # ['all', 'outliers', 'suspectedoutliers', False]
)
fig.show()

## Multiple Violin Plots color 分组

In [18]:
fig = px.violin(
    data_frame=tips,
    x="smoker",
    y="tip",
    box=True,  # draw box plot inside the violin
    points="all",
    color="sex",
    hover_data=tips.columns,
)
fig.show()

In [21]:
fig = px.violin(
    data_frame=tips,
    x="smoker",
    y="tip",
    points="all",
    color="sex",
    hover_data=tips.columns,
    violinmode="overlay",  # ['group', 'overlay']
)
fig.show()
# 重叠显示

# go.Violin

## Basic

In [23]:
df = pd.read_csv(
    "https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv"
)
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [24]:
fig = go.Figure(
    data=go.Violin(
        y=df["total_bill"],
        box_visible=True,  # show inside box
        line_color="black",
        meanline_visible=True,
        fillcolor="lightseagreen",
        opacity=0.6,
        x0="Total Bill",
    )
)

fig.update_layout(yaxis_zeroline=False)
fig.show()

## Grouped Violin Plot 分组画图

In [45]:
df = pd.read_csv(
    "https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv"
)
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [48]:
fig = go.Figure()

fig.add_trace(
    go.Violin(
        x=df["day"][df["sex"] == "Male"],
        y=df["total_bill"][df["sex"] == "Male"],
        legendgroup="M",
        scalegroup="M",
        name="M",
        line_color="blue",
    )
)
fig.add_trace(
    go.Violin(
        x=df["day"][df["sex"] == "Female"],
        y=df["total_bill"][df["sex"] == "Female"],
        legendgroup="F",
        scalegroup="F",
        name="F",
        line_color="orange",
    )
)

fig.update_traces(box_visible=True, meanline_visible=True)
fig.update_layout(violinmode="group")
fig.show()

## Multiple Traces

In [None]:
df = pd.read_csv(
    "https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv"
)
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [None]:
fig = go.Figure()

days = ["Thur", "Fri", "Sat", "Sun"]

for day in days:
    # 加4张图
    fig.add_trace(
        go.Violin(
            x=df["day"][df["day"] == day],
            y=df["total_bill"][df["day"] == day],
            name=day,
            box_visible=True,
            meanline_visible=True,
        )
    )

fig.show()

## Split Violin Plot

In [49]:
df = pd.read_csv(
    "https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv"
)
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [55]:
fig = go.Figure()

fig.add_trace(
    go.Violin(
        x=df["day"][df["smoker"] == "Yes"],
        y=df["total_bill"][df["smoker"] == "Yes"],
        legendgroup="Yes",
        scalegroup="Yes",
        name="Yes",
        side="negative",  # 左侧
        line_color="blue",
    )
)
fig.add_trace(
    go.Violin(
        x=df["day"][df["smoker"] == "No"],
        y=df["total_bill"][df["smoker"] == "No"],
        legendgroup="No",
        scalegroup="No",
        name="No",
        side="positive",  # 右侧
        line_color="orange",
    )
)
fig.update_traces(meanline_visible=True)
fig.update_layout(violingap=0, violinmode="overlay")
fig.show()

In [56]:
fig = go.Figure()

fig.add_trace(
    go.Violin(
        x=df["day"][df["smoker"] == "Yes"],
        y=df["total_bill"][df["smoker"] == "Yes"],
        legendgroup="Yes",
        scalegroup="Yes",
        name="Yes",
        # side='negative',    # 左侧
        line_color="blue",
    )
)
fig.add_trace(
    go.Violin(
        x=df["day"][df["smoker"] == "No"],
        y=df["total_bill"][df["smoker"] == "No"],
        legendgroup="No",
        scalegroup="No",
        name="No",
        # side='positive',    # 右侧
        line_color="orange",
    )
)
fig.update_traces(meanline_visible=True)
fig.update_layout(violingap=0, violinmode="overlay")
fig.show()

## Advanced Violin Plot

In [57]:
df = pd.read_csv(
    "https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv"
)
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [58]:
pointpos_male = [-0.9, -1.1, -0.6, -0.3]
pointpos_female = [0.45, 0.55, 1, 0.4]
show_legend = [True, False, False, False]

fig = go.Figure()

for i in range(0, len(pd.unique(df["day"]))):
    fig.add_trace(
        go.Violin(
            x=df["day"][(df["sex"] == "Male") & (df["day"] == pd.unique(df["day"])[i])],
            y=df["total_bill"][
                (df["sex"] == "Male") & (df["day"] == pd.unique(df["day"])[i])
            ],
            legendgroup="M",
            scalegroup="M",
            name="M",
            side="negative",
            pointpos=pointpos_male[i],  # where to position points 左侧 or 右侧
            line_color="lightseagreen",
            showlegend=show_legend[i],
        )
    )
    fig.add_trace(
        go.Violin(
            x=df["day"][
                (df["sex"] == "Female") & (df["day"] == pd.unique(df["day"])[i])
            ],
            y=df["total_bill"][
                (df["sex"] == "Female") & (df["day"] == pd.unique(df["day"])[i])
            ],
            legendgroup="F",
            scalegroup="F",
            name="F",
            side="positive",
            pointpos=pointpos_female[i],  # where to position points 左侧 or 右侧
            line_color="mediumpurple",
            showlegend=show_legend[i],
        )
    )

# update characteristics shared by all traces
fig.update_traces(
    meanline_visible=True,
    points="all",  # show all points
    jitter=0.05,  # add some jitter on points for better visibility
    scalemode="count",  # scale violin plot area with total count
)
fig.update_layout(
    title_text="Total bill distribution<br><i>scaled by number of bills per gender",
    violingap=0,
    violingroupgap=0,
    violinmode="overlay",
)
fig.show()

## Ridgeline plot

In [61]:
# 12 sets of normal distributed random data, with increasing mean and standard deviation
data = (
    np.linspace(1, 2, 12)[:, np.newaxis] * np.random.randn(12, 200)
    + (np.arange(12) + 2 * np.random.random(12))[:, np.newaxis]
)

colors = n_colors("rgb(5, 200, 200)", "rgb(200, 10, 10)", 12, colortype="rgb")

fig = go.Figure()
for data_line, color in zip(data, colors):
    fig.add_trace(go.Violin(x=data_line, line_color=color))

# 只显示positive
fig.update_traces(orientation="h", side="positive", width=3, points=False)
fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)
fig.show()

In [62]:
# 12 sets of normal distributed random data, with increasing mean and standard deviation
data = (
    np.linspace(1, 2, 12)[:, np.newaxis] * np.random.randn(12, 200)
    + (np.arange(12) + 2 * np.random.random(12))[:, np.newaxis]
)

colors = n_colors("rgb(5, 200, 200)", "rgb(200, 10, 10)", 12, colortype="rgb")

fig = go.Figure()
for data_line, color in zip(data, colors):
    fig.add_trace(go.Violin(x=data_line, line_color=color))

# 只显示positive
# fig.update_traces(orientation='h', side='positive', width=3, points=False)
fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)
fig.show()

# px.strip

## Violin Plot With Only Points

In [63]:
tips = px.data.tips()
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [64]:
fig = px.strip(data_frame=tips, x="day", y="tip")
fig.show()