'''
【课程5.6】  柱状图/堆叠图/直方图

① 单系列柱状图
② 多系列柱状图
③ 堆叠图
④ 直方图

'''


In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

from bokeh.io import output_notebook
output_notebook()

from bokeh.plotting import figure,show
from bokeh.models import ColumnDataSource
# 导入图表绘制、图标展示模块
# 导入ColumnDataSource模块

In [2]:
#1，单系列柱状图
# vbar

p = figure(plot_width = 600,plot_height = 400)
p.vbar(x=[1, 2, 3], width=0.5, bottom=0,top=[1.2, 2.5, 3.7],  # x：横轴坐标，width：宽度，bottom：底高度，top：顶高度,bottom=[0.1,0,4,0.5]
       #color = ['red','blue','green'], alpha = 0.8   # 整体颜色设置，也可单独设置 → color="firebrick"
       line_width = 1,line_alpha = 0.8,line_color = 'black', line_dash = [5,2],    # 单独设置线参数
       fill_color = 'red',fill_alpha = 0.6    # 单独设置填充颜色参数
      )
show(p)

In [3]:
# 1、单系列柱状图
# hbar

p = figure(x_range = [0,3],y_range =[0,3])
p.hbar(y=[1,2,2.5],height=0.5,left=[0.5,0.4,0.3],right=[1.2,2.4,0.6],
      color = ['blue','yellow','green'])

show(p)

In [11]:
# 1、单系列柱状图 - 分类设置标签
# ColumnDataSource
#若想使用文字标签，只能用columndatasource这种方法
from bokeh.palettes import Spectral6
from bokeh.transform import factor_cmap

fruits = ['apples','pears','grapes','banana','pinenapple','nuts']
counts = [2,4,5,7,3,1]
source = ColumnDataSource(data= dict(fruits=fruits,counts = counts))
colors = ['pink','purple','orange','olive','skyblue','gold']
#创建一个包含标签的data，对象类型为colunmndatasource

p = figure(x_range=fruits,y_range=(0,8),plot_height=350,title='水果数量',tools='')

p.vbar(x='fruits',top='counts',source=source,
      width=0.9,alpha=0.8,
      color=factor_cmap('fruits',palette=Spectral6,factors=fruits),
      legend='水果')
# 绘制柱状图，横轴直接显示标签
# factor_cmap(field_name, palette, factors, start=0, end=None, nan_color='gray')：颜色转换模块，生成一个颜色转换对象
# field_name：分类名称
# palette：调色盘
# factors：用于在调色盘中分颜色的参数
# 参考文档：http://bokeh.pydata.org/en/latest/docs/reference/transform.html


p.xgrid.grid_line_color = None
p.legend.orientation = 'horizontal'
p.legend.location = 'top_right'
show(p)

In [9]:
#dataframe构建柱状图可使用此方法
df = pd.DataFrame({'counts':[2,4,5,7,3,1],'colors':['pink','purple','orange','olive','skyblue','gold']}
                  )
p = figure(plot_width = 600,plot_height = 400)
p.vbar(x = df.index,width=0.5,bottom = 0,top =df['counts'],
      color = df['colors'])
show(p)
print(df)

    colors  counts
0     pink       2
1   purple       4
2   orange       5
3    olive       7
4  skyblue       3
5     gold       1


In [16]:
# 2、多系列柱状图
# vbar
#ColumnDataSource经常是用于列表的，但也可以用于dataframe
from bokeh.transform import dodge
from bokeh.core.properties import value

df = pd.DataFrame({'2015':[2,3,4,5,10,8],'2016':[2,4,5,6,8,9],'2017':[9,3,2,7,5,3]},
                 index = ['appels','pears','bananas','nuts','pines','pineappels'])

fruits = df.index.tolist()
years = df.columns.tolist()
data = {'index':fruits}

for year in years:
    data[year] = df[year].tolist()
print(data)
# data数据格式为dict

source = ColumnDataSource(data = data)
#将数据转换为ColumnDataSource对象

p = figure(x_range = fruits,y_range=(0,13),plot_height = 400,title = '年水果量')
p.vbar(x=dodge('index',-0.25,range=p.x_range),top = '2015',width=0.2,source=source,color = 'red',legend=value('2015'))
p.vbar(x=dodge('index',0,range=p.x_range),top = '2016',width=0.2,source=source,color = 'yellow',legend=value('2016'))
p.vbar(x=dodge('index',0.25,range=p.x_range),top = '2017',width=0.2,source=source,color = 'blue',legend=value('2017'))
# 绘制多系列柱状图
# dodge(field_name, value, range=None) → 转换成一个可分组的对象，value为元素的位置（配合width设置）,相当于设置柱之间的空白间距
# value(val, transform=None) → 按照年份分为dict


p.xgrid.grid_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"
# 其他参数设置

show(p)

{'2016': [2, 4, 5, 6, 8, 9], '2017': [9, 3, 2, 7, 5, 3], '2015': [2, 3, 4, 5, 10, 8], 'index': ['appels', 'pears', 'bananas', 'nuts', 'pines', 'pineappels']}


In [19]:
# 堆叠图

from bokeh.core.properties import value

fruits = ['appels','pears','nuts','bananas','grapes','oranges']
years = ['2015','2016','2017']
colors = ['red','yellow','blue']
data = {'fruits':fruits,
       '2015':[2,1,4,5,6,6],
       '2016':[7,3,5,6,4,2],
       '2017':[2,3,5,8,1,2]}
source = ColumnDataSource(data = data)
p = figure(x_range=fruits,title = '年水果量')

renders = p.vbar_stack(years,# 设置堆叠值，这里source中包含了不同年份的值，years变量用于识别不同堆叠层
                      x = 'fruits',#设置坐标
                      source = source,
                      width = 0.9,color = colors,
                      legend = [value(x) for x in years],name=years)
# 绘制堆叠图
# 注意第一个参数需要放years
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"
# 设置其他参数

show(p)


In [26]:
# 堆叠图

from bokeh.palettes import GnBu3,OrRd3

fruits = ['appels','pears','nuts','bananas','grapes','oranges']
years = ['2015','2016','2017']
exports ={'fruits':fruits,
       '2015':[2,1,4,5,6,6],
       '2016':[7,3,5,6,4,2],
       '2017':[2,3,5,8,1,2]}
imports = {'fruits':fruits,
       '2015':[-1,-1,-7,-5,-5,6],
       '2016':[-5,-3,-2,-6,-2,-2],
       '2017':[-1,-4,-5,-4,-1,-2]}

p = figure(y_range=fruits, plot_height=350, x_range=(-16, 16), title="Fruit import/export, by year")

p.hbar_stack(years, y='fruits', height=0.9, color=GnBu3, source=ColumnDataSource(exports),
             legend=["%s exports" % x for x in years])      # 绘制出口数据堆叠图

p.hbar_stack(years, y='fruits', height=0.9, color=OrRd3, source=ColumnDataSource(imports),
             legend=["%s imports" % x for x in years])      # 绘制进口数据堆叠图，这里值为负值

p.y_range.range_padding = 0.2     # 调整边界间隔
p.ygrid.grid_line_color = None   
p.legend.location = "top_left"
p.axis.minor_tick_line_color = None
p.outline_line_color = None
# 设置其他参数


show(p)

In [30]:
# 4、直方图
# np.histogram + figure.quad()
# 不需要构建ColumnDataSource对象

df = pd.DataFrame({'value':np.random.randn(1000)*100})
df.index.name = 'index'

hist,edges = np.histogram(df['value'],bins=20)
print(hist[:5])
print(edges)

# 将数据解析成直方图统计格式
# 高阶函数np.histogram(a, bins=10, range=None, weights=None, density=None) 
# a：数据
# bins：箱数
# range：最大最小值的范围，如果不设定则为(a.min(), a.max())
# weights：权重
# density：为True则返回“频率”，为False则返回“计数”
# 返回值1 - hist：每个箱子的统计值（top）
# 返回值2 - edges：每个箱子的位置坐标，这里n个bins将会有n+1个edges

p = figure()
p = figure(title="HIST", tools="save",background_fill_color="#E8DDCB")
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],   # 分别代表每个柱子的四边值
        fill_color="#036564", line_color="#033649")
# figure.quad绘制直方图

show(p)

[ 1  0  2  3 15]
[-363.05676747 -329.82780072 -296.59883397 -263.36986722 -230.14090047
 -196.91193372 -163.68296696 -130.45400021  -97.22503346  -63.99606671
  -30.76709996    2.46186679   35.69083355   68.9198003   102.14876705
  135.3777338   168.60670055  201.83566731  235.06463406  268.29360081
  301.52256756]
