https://plotly.com/python/distplot/

In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

# Visualizing the distribution

In [2]:
tips = px.data.tips()
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [3]:
fig = px.histogram(
    data_frame=tips,
    x="total_bill",
    color="sex",
    marginal="rug",  # can be `rug`, `box`, `violin`
    hover_data=tips.columns,
)
fig.show()

In [4]:
fig = px.histogram(
    data_frame=tips,
    x="total_bill",
    color="sex",
    marginal="box",  # can be `rug`, `box`, `violin`
    hover_data=tips.columns,
)
fig.show()

In [5]:
fig = px.histogram(
    data_frame=tips,
    x="total_bill",
    color="sex",
    marginal="violin",  # can be `rug`, `box`, `violin`
    hover_data=tips.columns,
)
fig.show()

# ff.create_distplot

## Basic

In [6]:
x = np.random.randn(1000)

hist_data = [x]

group_labels = ["distplot"]  # name of the dataset

fig = ff.create_distplot(hist_data, group_labels)
fig.show()

## 多组数据

In [7]:
# Add histogram data
x1 = np.random.randn(200) - 2
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 2
x4 = np.random.randn(200) + 4

# Group data together
hist_data = [x1, x2, x3, x4]

group_labels = ["Group 1", "Group 2", "Group 3", "Group 4"]

In [8]:
# Create distplot with custom bin_size
fig = ff.create_distplot(hist_data, group_labels, bin_size=0.2)
fig.show()

## Use Multiple Bin Sizes

In [9]:
# Create distplot with custom bin_size
fig = ff.create_distplot(
    hist_data, group_labels, bin_size=[0.1, 0.25, 0.5, 1]
)  # 每组数据使用不同而bin_size
fig.show()

## Customize Rug Text, Colors & Title

In [10]:
x1 = np.random.randn(26)
x2 = np.random.randn(26) + 0.5

group_labels = ["2014", "2015"]

rug_text_one = [
    "a",
    "b",
    "c",
    "d",
    "e",
    "f",
    "g",
    "h",
    "i",
    "j",
    "k",
    "l",
    "m",
    "n",
    "o",
    "p",
    "q",
    "r",
    "s",
    "t",
    "u",
    "v",
    "w",
    "x",
    "y",
    "z",
]

rug_text_two = [
    "aa",
    "bb",
    "cc",
    "dd",
    "ee",
    "ff",
    "gg",
    "hh",
    "ii",
    "jj",
    "kk",
    "ll",
    "mm",
    "nn",
    "oo",
    "pp",
    "qq",
    "rr",
    "ss",
    "tt",
    "uu",
    "vv",
    "ww",
    "xx",
    "yy",
    "zz",
]

rug_text = [rug_text_one, rug_text_two]  # for hover in rug plot
colors = ["rgb(0, 0, 100)", "rgb(0, 200, 200)"]

# Create distplot with custom bin_size
fig = ff.create_distplot(
    [x1, x2], group_labels, bin_size=0.2, rug_text=rug_text, colors=colors
)

fig.update_layout(title_text="Customized Distplot")
fig.show()

## Plot Normal Curve

In [11]:
x1 = np.random.randn(200)
x2 = np.random.randn(200) + 2

group_labels = ["Group 1", "Group 2"]

colors = ["slategray", "magenta"]

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(
    [x1, x2],
    group_labels,
    bin_size=0.5,
    curve_type="normal",  # override default 'kde'
    colors=colors,
)

# Add title
fig.update_layout(title_text="Distplot with Normal Distribution")
fig.show()

## Plot Only Curve and Rug

In [12]:
x1 = np.random.randn(200) - 1
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 1

hist_data = [x1, x2, x3]

group_labels = ["Group 1", "Group 2", "Group 3"]
colors = ["#333F44", "#37AA9C", "#94F3E4"]

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(
    hist_data,
    group_labels,
    colors=colors,
    show_hist=False,
)

# Add title
fig.update_layout(title_text="Curve and Rug Plot")
fig.show()

## Plot Only Hist and Rug

In [13]:
colors = ["#835AF1", "#7FA6EE", "#B8F7D4"]

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(
    hist_data, group_labels, colors=colors, bin_size=0.25, show_curve=False
)

# Add title
fig.update_layout(title_text="Hist and Rug Plot")
fig.show()

## Plot Only Hist and Curve

In [14]:
x1 = np.random.randn(200) - 2
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 2

hist_data = [x1, x2, x3]

group_labels = ["Group 1", "Group 2", "Group 3"]
colors = ["#A56CC1", "#A6ACEC", "#63F5EF"]

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(
    hist_data, group_labels, colors=colors, bin_size=0.2, show_rug=False
)

# Add title
fig.update_layout(title_text="Hist and Curve Plot")
fig.show()

## Plot Hist and Rug with Different Bin Sizes

In [15]:
x1 = np.random.randn(200) - 2
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 2

hist_data = [x1, x2, x3]

group_labels = ["Group 1", "Group 2", "Group 3"]
colors = ["#393E46", "#2BCDC1", "#F66095"]

fig = ff.create_distplot(
    hist_data, group_labels, colors=colors, bin_size=[0.3, 0.2, 0.1], show_curve=False
)

# Add title
fig.update(layout_title_text="Hist and Rug Plot")
fig.show()

## Distplot with Pandas

In [17]:
import pandas as pd

df = pd.DataFrame({"2012": np.random.randn(200), "2013": np.random.randn(200) + 1})
df.head()

Unnamed: 0,2012,2013
0,0.366837,1.232015
1,2.096536,0.897901
2,0.650585,0.951161
3,-0.88013,-0.067528
4,0.542561,2.294966


In [18]:
fig = ff.create_distplot([df[c] for c in df.columns], df.columns, bin_size=0.25)
fig.show()