# 数据可视化实战

## 精美制图: 掌握Plotly交互式绘图，绘制出更加精美的图形

### 1. plotly介绍和基础


* plotly语法介绍
* 一般使用离线绘图库

In [None]:
import plotly as py 
import plotly.graph_objs as go
from plotly.graph_objs import Scatter #导入graph_objs中的子模块

In [None]:
# # py.offline 是绘图的主要函数
# py.offline.plot(data,filename ='fth.html')
# data 代表数据
# filename代表路劲
# filename是保存的路劲

In [None]:
# 绘制出来的图形，会生成一个html文件
trace0 = Scatter(x = [1,2,3,4],y = [10,15,13,17])
trace1 = Scatter(x = [1,2,3,4],y = [6,5,11,9])
data = [trace0,trace1]
py.offline.plot(data,filename ='fth.html')


代码解释
* 1. 首先定义一个trace0的变量，用于保存绘图数据
* 2. 每个绘图对象都由Plotly模块里面graph_objs图像对象中的子模块来定义
* 3. 定义变量trace0和trace1的x和y坐标
* 4. 定义一个data,把trace0和trace1组合成一组数据
* 5. 最后通过该plot函数绘制

In [None]:
# 在jupyter notebook下输出
import plotly as py
from plotly.graph_objs import Scatter
py.offline.init_notebook_mode()# 初始化步骤
trace0 = Scatter(x = [1,2,3,4],y = [10,15,13,17])
trace1 = Scatter(x = [1,2,3,4],y = [6,5,11,9])
data = [trace0,trace1]
py.offline.iplot(data,filename ='12')

### 2. 常见图形绘制


* 散点图

In [None]:
import pandas as pd
import os
os.chdir('E:\云开明培训机构\云开见明培训课件\data summary\第四章')

In [None]:
income_data= pd.read_csv('income_data.csv')
year = list(range(1994,2014,1))

In [None]:
income_data.insert(0,'year',year)

In [None]:
income_data

In [None]:

# mode--定义图形类型，散点或者线形图
pyplot  = py.offline.iplot
trace1 = Scatter(x =income_data['year'], y = income_data['x2'], mode ='markers',name = '在职工工资总额')
trace2 = Scatter(x =income_data['year'], y = income_data['x3'], mode ='lines',name = '社会消费品零售总额')
trace3 = Scatter(x =income_data['year'], y = income_data['x8'],mode ='markers+lines',name = \
                 '地区生产总值')
# mode--定义图形类型，散点或者线形图
data  = [trace1,trace2,trace3]
pyplot(data,filename ='fth.html')

* 线性图

In [None]:
trace1 = Scatter( y = income_data['x8'], x = income_data['y'],name ='地区生产总值和财政收入关系',mode='lines',\
                line = dict(width = 2,color='green'))
trace2 = Scatter( y = income_data['x3'], x = income_data['y'],name ='社会消费品零售总额和财政收入关系',mode='lines',\
                line = dict(width = 2,color='blue'))


data  = [trace1,trace2]
#go.Layout就可以创建图层对象
layout = go.Layout(title = '财政收入变化',xaxis = dict(title = '地区生产总值'),yaxis = dict(title = '财政收入'),\
                   legend=dict(x=1,y =0.5,font = dict(size=5,color='black')))
fig = go.Figure(data=data,layout = layout)# data与layout组成一个图象对象
pyplot(fig,filename ='fth.html')

* 柱状图

In [None]:
Prod_Trade  = pd.read_excel('Prod_Trade.xlsx')

In [None]:
# 统计不同地区的订单数量
order_Region = Prod_Trade['Region'].value_counts()

In [None]:
order_Region

In [None]:
trace_basic = [go.Bar(x =order_Region.index.tolist(),y =order_Region.values.tolist(),
                      marker=dict(color=["red", "blue","green",'darkgray', 'darkgrey', 'darkgreen']),\
                      opacity=0.39)] #透明度
layout = go.Layout(title = '柱状图', xaxis =dict(title ='地区'))
figure_basic = go.Figure(data = trace_basic,layout=layout)# data与layout组成一个图象对象
pyplot(figure_basic, filename='styled_line.html') #输出

In [None]:
# # # # # # 柱状簇
Industry_GDP = pd.read_excel('Industry_GDP.xlsx')

In [None]:
G1 = Industry_GDP[Industry_GDP['Industry_Type'] =='第一产业']
G2 = Industry_GDP[Industry_GDP['Industry_Type'] =='第二产业']
G3 = Industry_GDP[Industry_GDP['Industry_Type'] =='第三产业']

trace_1 = go.Bar(x =G1.Quarter, y = G1.GDP,name ='第一产业')
trace_2 = go.Bar(x =G2.Quarter, y = G2.GDP,name ='第二产业')
trace_3 = go.Bar(x =G3.Quarter, y = G3.GDP,name ='第三产业')

trace = [trace_1,trace_2,trace_3]
layout = go.Layout(title = '三大产业的GDP', xaxis =dict(title ='季度'))
# figure
figure = go.Figure(data = trace,layout=layout)
pyplot(figure,filename='styled_line.html') #输出

In [None]:
# # #层叠柱状图
Industry_GDP = pd.read_excel('Industry_GDP.xlsx')
G1 = Industry_GDP[Industry_GDP['Industry_Type'] =='第一产业']
G2 = Industry_GDP[Industry_GDP['Industry_Type'] =='第二产业']
G3 = Industry_GDP[Industry_GDP['Industry_Type'] =='第三产业']

trace_1 = go.Bar(x =G1.Quarter, y = G1.GDP,name ='第一产业')
trace_2 = go.Bar(x =G2.Quarter, y = G2.GDP,name ='第二产业')
trace_3 = go.Bar(x =G3.Quarter, y = G3.GDP,name ='第三产业')

trace = [trace_1,trace_2,trace_3]
layout = go.Layout(title = '三大产业的GDP', xaxis =dict(title ='季度'),barmode ='stack')
# figure
figure = go.Figure(data = trace,layout=layout)
pyplot(figure,filename='styled_line.html') #输出

* 直方图


In [None]:
Titanic = pd.read_csv('titanic_train.csv')
data = [go.Histogram(x = Titanic['Age'], histnorm ='probability',marker = dict(color = 'blue'))]
pyplot(data,filename='styled_line.html') #输出

* 饼图

In [None]:
order_Region

In [None]:
trace = [go.Pie(labels = order_Region.index, values = order_Region.values,hole=0.2,  \
               textfont =dict(size=12,color ='white'))]                 # hole中间部分
layout = go.Layout(title = '不同地区订单比例')
fig = go.Figure(data = trace,layout=layout) # data与layout组成一个图象对象
pyplot(fig,filename='styled_line.html') #输出

### 3. 图形设置

* 多图表


In [None]:
# 计算订单平均数量
mean_value = np.mean(order_Region.values.tolist())
y = np.tile(mean_value,6)

In [None]:
trace_basic = go.Bar(x =order_Region.index.tolist(),y = order_Region.values.tolist(),
                      marker=dict(color=["red", "blue","green",'darkgray', 'darkgrey', 'darkgreen']),\
                      opacity=0.39)#透明度
trace_average = go.Scatter(x = order_Region.index.tolist(), y = y,mode ='lines',
                          name ='平均值' )

In [None]:
data = [trace_basic,trace_average]

In [None]:
layout = go.Layout(title = '不同地区订单数量对比', xaxis =dict(title ='地区'))
figure_basic = go.Figure(data = data,layout=layout)# data与layout组成一个图象对象
pyplot(figure_basic, filename='styled_line.html') #输出

* 双坐标轴
* 主要目的为了体现数值较小的变量的变化规律

* 绘制地区生产总值，税收和财政收入之间的关系

In [None]:
trace1 = Scatter( x = income_data['y'], y = income_data['x8'],name ='地区生产总值和财政收入关系',mode='lines',\
                line = dict(width = 2,color='green'))
trace2 = Scatter( x = income_data['y'], y = income_data['x10'],name ='税收和财政收入关系',mode='lines',\
                line = dict(width = 2,color='blue'), yaxis='y2')


data  = [trace1,trace2]
#go.Layout就可以创建图层对象
layout = go.Layout(title = '财政收入变化',xaxis = dict(title = '财政收入'),
       yaxis = dict(title = '地区生产总值'),yaxis2 = dict(title = '税收收入',overlaying ='y',side = 'right'),
                   legend=dict(x=.1,y =1,font = dict(size=12,color='black')))
fig = go.Figure(data=data,layout = layout)# data与layout组成一个图象对象
pyplot(fig,filename ='fth.html')

* 多子图

* 订单数量按地区和箱子类型汇总

In [None]:
# 统计不同箱子类型的订单数量
order_Box =  Prod_Trade['Box_Type'].value_counts()

In [None]:
from plotly import tools
fig = tools.make_subplots(rows=2,cols=1)

In [None]:
trace1 = go.Bar(x = order_Region.index.tolist(),y = order_Region.values.tolist(),
                      marker=dict(color=["red", "blue","green",'darkgray', 'darkgrey', 'darkgreen']),\
                      opacity=0.39,name ='不同地区订单')  #透明度

In [None]:
trace2 = go.Bar(x = order_Box.index.tolist(),y = order_Box.values.tolist(),
                      marker=dict(color=["red", "blue","green",'darkgray', 'darkgrey', 'darkgreen']),\
                      opacity=0.99,name ='不同箱子类型订单')  #透明度

In [None]:
fig.append_trace(trace1,1,1)

In [None]:
fig.append_trace(trace2,2,1)

In [None]:
fig['layout'].update(height=600,width =600, title='不同地区订单')

In [None]:

pyplot(fig, filename='styled_line.html') #输出
