https://plotly.com/python/histograms/

In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# px.histogram

## 连续值

In [2]:
tips = px.data.tips()
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [3]:
fig = px.histogram(
    data_frame=tips,
    x="total_bill"
)
fig.show()

## 离散值(类别)

In [4]:
# Here we use a column with categorical data
fig = px.histogram(
    data_frame=tips,
    x="day"
)
fig.show()

## Choosing the number of bins

In [5]:
fig = px.histogram(
    data_frame=tips,
    x="total_bill",
    nbins=10
)
fig.show()

## Histograms on Date Data

In [6]:
stocks = px.data.stocks()
stocks.head()

Unnamed: 0,date,GOOG,AAPL,AMZN,FB,NFLX,MSFT
0,2018-01-01,1.0,1.0,1.0,1.0,1.0,1.0
1,2018-01-08,1.018172,1.011943,1.061881,0.959968,1.053526,1.015988
2,2018-01-15,1.032008,1.019771,1.05324,0.970243,1.04986,1.020524
3,2018-01-22,1.066783,0.980057,1.140676,1.016858,1.307681,1.066561
4,2018-01-29,1.008773,0.917143,1.163374,1.018357,1.273537,1.040708


In [7]:
fig = px.histogram(
    data_frame=stocks,
    x="date"
)
# 列间距
fig.update_layout(bargap=0.2)
fig.show()

## Histograms on Categorical Data

In [8]:
tips = px.data.tips()
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [9]:
fig = px.histogram(
    data_frame=tips,
    x="day",
    category_orders=dict(day=["Thur", "Fri", "Sat", "Sun"])
)
fig.show()

## Accessing the counts (y-axis) values

In [10]:
tips = px.data.tips()
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [11]:
# create the bins
counts, bins = np.histogram(tips.total_bill, bins=range(0, 60, 5))
bins = 0.5 * (bins[:-1] + bins[1:])

fig = px.bar(x=bins, y=counts, labels={'x':'total_bill', 'y':'count'})
fig.show()

## Type of normalization

In [12]:
tips = px.data.tips()
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [13]:
fig = px.histogram(
    data_frame=tips,
    x="total_bill",
    histnorm='probability density'  # histnorm
)
fig.show()

## Aspect of the histogram plot

In [14]:
tips = px.data.tips()
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [15]:
fig = px.histogram(
    data_frame=tips,
    x="total_bill",
    title='Histogram of bills',
    labels={'total_bill':'total bill'},     # can specify one label per df column
    opacity=0.8,
    log_y=True,                             # represent bars with log scale
    color_discrete_sequence=['indianred']   # color of histogram bars
)
fig.show()

## 分组颜色

In [16]:
tips = px.data.tips()
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [17]:
fig = px.histogram(
    data_frame=tips,
    x="total_bill",
    color="sex" # 按照类别分别绘制2个histogram
)
fig.show()

## Aggregating with other functions than count

In [18]:
tips = px.data.tips()
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [19]:
fig = px.histogram(
    data_frame=tips,
    x="total_bill",     # both x and y
    y="tip",
    histfunc='avg'
)
fig.show()

In [20]:
# The default histfunc is sum if y is given, and works with categorical as well as binned numeric data on the x axis:
fig = px.histogram(
    data_frame=tips,
    x="day",
    y="total_bill",
    category_orders=dict(day=["Thur", "Fri", "Sat", "Sun"])
)
fig.show()

In [21]:
fig = px.histogram(
    data_frame=tips,
    x="sex",
    y="total_bill",
    color="sex",            # 分类
)
fig.show()

In [22]:
fig = px.histogram(
    data_frame=tips,
    x="sex",
    y="total_bill",
    color="sex",            # 分类
    pattern_shape="smoker"  # 模式,2次分类
)
fig.show()

## Visualizing the distribution

In [23]:
tips = px.data.tips()
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [24]:
fig = px.histogram(
    data_frame=tips,
    x="total_bill",
    color="sex",
    marginal="rug", # can be `rug`, `box`, `violin`
    hover_data=tips.columns
)
fig.show()

In [25]:
fig = px.histogram(
    data_frame=tips,
    x="total_bill",
    color="sex",
    marginal="box", # can be `rug`, `box`, `violin`
    hover_data=tips.columns
)
fig.show()

In [26]:
fig = px.histogram(
    data_frame=tips,
    x="total_bill",
    color="sex",
    marginal="violin", # can be `rug`, `box`, `violin`
    hover_data=tips.columns
)
fig.show()

## Adding text labels

In [27]:
tips = px.data.tips()
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [28]:
fig = px.histogram(
    data_frame=tips,
    x="total_bill",
    y="tip",
    histfunc="avg",
    nbins=8,
    text_auto=True  # add text
)
fig.show()

# go.Histogram

## Basic

In [49]:
x = np.random.randn(500)

fig = go.Figure(data=[go.Histogram(x=x)])
fig.show()

## Normalized Histogram

In [48]:
fig = go.Figure(data=[go.Histogram(x=x, histnorm='probability')])

fig.show()

## Horizontal Histogram

In [31]:
fig = go.Figure(data=[go.Histogram(y=x)])
fig.show()

## Histogram Bar Text

In [50]:
numbers = ["5", "10", "3", "10", "5", "8", "5", "5"]

fig = go.Figure()
fig.add_trace(go.Histogram(
        x=numbers,
        name="count",
        texttemplate="%{x}",    # text
        textfont_size=20
    )
)

fig.show()

## Overlaid Histogram 多组数据 ['stack', 'group', 'overlay', 'relative']

In [32]:
x0 = np.random.randn(500)
# Add 1 to shift the mean of the Gaussian distribution
x1 = np.random.randn(500) + 1

fig = go.Figure()
fig.add_trace(go.Histogram(x=x0))
fig.add_trace(go.Histogram(x=x1))

# ['stack', 'group', 'overlay', 'relative']
fig.update_layout(barmode='stack')

# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()

In [33]:
x0 = np.random.randn(500)
# Add 1 to shift the mean of the Gaussian distribution
x1 = np.random.randn(500) + 1

fig = go.Figure()
fig.add_trace(go.Histogram(x=x0))
fig.add_trace(go.Histogram(x=x1))

# ['stack', 'group', 'overlay', 'relative']
fig.update_layout(barmode='group')

# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()

In [34]:
x0 = np.random.randn(500)
# Add 1 to shift the mean of the Gaussian distribution
x1 = np.random.randn(500) + 1

fig = go.Figure()
fig.add_trace(go.Histogram(x=x0))
fig.add_trace(go.Histogram(x=x1))

# ['stack', 'group', 'overlay', 'relative']
fig.update_layout(barmode='overlay')

# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()

In [35]:
x0 = np.random.randn(500)
# Add 1 to shift the mean of the Gaussian distribution
x1 = np.random.randn(500) + 1

fig = go.Figure()
fig.add_trace(go.Histogram(x=x0))
fig.add_trace(go.Histogram(x=x1))

# ['stack', 'group', 'overlay', 'relative']
fig.update_layout(barmode='relative')

# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()

## Styled Histogram

In [36]:
x0 = np.random.randn(500)
x1 = np.random.randn(500) + 1

fig = go.Figure()
fig.add_trace(go.Histogram(
    x=x0,
    histnorm='percent',
    name='control', # name used in legend and hover labels
    xbins=dict( # bins used for histogram
        start=-4.0,
        end=3.0,
        size=0.5
    ),
    marker_color='#EB89B5',
    opacity=0.75
))
fig.add_trace(go.Histogram(
    x=x1,
    histnorm='percent',
    name='experimental',
    xbins=dict(
        start=-3.0,
        end=4,
        size=0.5
    ),
    marker_color='#330C73',
    opacity=0.75
))

fig.update_layout(
    title_text='Sampled Results', # title of plot
    xaxis_title_text='Value', # xaxis label
    yaxis_title_text='Count', # yaxis label
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.1 # gap between bars of the same location coordinates
)

fig.show()

## Cumulative Histogram

In [38]:
x = np.random.randn(500)

fig = go.Figure(
    data=[go.Histogram(x=x, cumulative_enabled=True)] # cumulative_enabled
)

fig.show()

## Specify Aggregation Function

In [39]:
x = ["Apples","Apples","Apples","Oranges", "Bananas"]
y = ["5","10","3","10","5"]

fig = go.Figure()
fig.add_trace(go.Histogram(histfunc="count", y=y, x=x, name="count"))   # histfunc
fig.add_trace(go.Histogram(histfunc="sum", y=y, x=x, name="sum"))       # histfunc

fig.show()

## Custom Binning

In [40]:
x = ['1970-01-01', '1970-01-01', '1970-02-01', '1970-04-01', '1970-01-02',
     '1972-01-31', '1970-02-13', '1971-04-19']

fig = make_subplots(rows=3, cols=2)

trace0 = go.Histogram(x=x, nbinsx=4)
trace1 = go.Histogram(x=x, nbinsx = 8)
trace2 = go.Histogram(x=x, nbinsx=10)
trace3 = go.Histogram(x=x,
                      xbins=dict(
                      start='1969-11-15',
                      end='1972-03-31',
                      size='M18'), # M18 stands for 18 months
                      autobinx=False
                     )
trace4 = go.Histogram(x=x,
                      xbins=dict(
                      start='1969-11-15',
                      end='1972-03-31',
                      size='M4'), # 4 months bin size
                      autobinx=False
                      )
trace5 = go.Histogram(x=x,
                      xbins=dict(
                      start='1969-11-15',
                      end='1972-03-31',
                      size= 'M2'), # 2 months
                      autobinx = False
                      )

# add_trace or append_trace
fig.append_trace(trace0, 1, 1)
fig.append_trace(trace1, 1, 2)
fig.append_trace(trace2, 2, 1)
fig.append_trace(trace3, 2, 2)
fig.append_trace(trace4, 3, 1)
fig.append_trace(trace5, 3, 2)

fig.show()

## Share bins between histograms

In [41]:
fig = go.Figure(go.Histogram(
    x=np.random.randint(7, size=100),
    bingroup=1))

fig.add_trace(go.Histogram(
    x=np.random.randint(7, size=20),
    bingroup=1))

# ['stack', 'group', 'overlay', 'relative']
fig.update_layout(
    barmode="overlay",
    bargap=0.1)

fig.show()

## Sort Histogram by Category Order

In [42]:
tips = px.data.tips()
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [43]:
fig = px.histogram(
    data_frame=tips, x="day").update_xaxes(categoryorder='total ascending')
fig.show()

In [44]:
fig = px.histogram(data_frame=tips, x="day", color="smoker").update_xaxes(categoryorder='total descending')
fig.show()