## Chapter 8 数据可视化中的交互

### 8.4 实例案例
ECharts本身是JS交互式数据库，十分适合用于生成可视化交互图形。使用pyecharts可以用python与echarts对接，生成具有各种效果的动态图，用户也可以在图形上进行各种交互操作。以下几个部分将之前学习过的各类图进行增强绘制操作，达到更好的可视化效果和交互效果。

#### 8.4.1 折线图

相比于普通的折线图，这里添加了最高最低的标签点，同时增加了平均值参考线。

In [3]:
from pyecharts.charts import Line
import pyecharts.options as opts
import pandas as pd

df = pd.read_csv('./data/city_day.csv')
data = df.loc[df['City'] == 'Delhi', ['Date', 'AQI']]

x = [e for e in data['Date']]
y = [e for e in data['AQI']]

line = Line()
line.add_xaxis(x)
line.add_yaxis('Delhi', y)

line.set_series_opts(
    areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
    markpoint_opts=opts.MarkPointOpts(# 最高最低
        data=[
            opts.MarkPointItem(type_='max', name='最大值'),
            opts.MarkPointItem(type_='min', name='最小值'),
        ]
    ),
    markline_opts=opts.MarkLineOpts(# 设置平均值参考线
        data=[
            opts.MarkLineItem(type_='average'),
        ]
    ),
    label_opts=opts.LabelOpts(is_show=False),
)

line.set_global_opts(
    title_opts=opts.TitleOpts(
        title='2015-2020印度德里AQI走势图',
        pos_left='center',
    ),
    legend_opts=opts.LegendOpts(
        pos_left='left',
        orient='vertical',
    )
)

# line.render('line_AQI.html')
line.render_notebook()

数据中包含更多信息，如pm2.5，只需要对标签进行替换即可

In [4]:
from pyecharts.charts import Line
import pyecharts.options as opts
import pandas as pd

df = pd.read_csv('./data/city_day.csv')
data = df.loc[df['City'] == 'Delhi', ['Date', 'PM2.5']] # 仅替换标签

x = [e for e in data['Date']]
y = [e for e in data['PM2.5']]

line = Line()
line.add_xaxis(x)
line.add_yaxis('Delhi', y)

line.set_series_opts(
    areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
    markpoint_opts=opts.MarkPointOpts(
        data=[
            opts.MarkPointItem(type_='max', name='最大值'),
            opts.MarkPointItem(type_='min', name='最小值'),
        ]
    ),
    markline_opts=opts.MarkLineOpts(
        data=[
            opts.MarkLineItem(type_='average'),
        ]
    ),
    label_opts=opts.LabelOpts(is_show=False),
)

line.set_global_opts(
    title_opts=opts.TitleOpts(
        title='2015-2020印度德里PM2.5走势图',
        pos_left='center',
    ),
    legend_opts=opts.LegendOpts(
        pos_left='left',
        orient='vertical',
    )
)

# line.render('line_PM2.5.html')
line.render_notebook()


全年所有天的数据虽然覆盖较为全面，但是数据非常密集，起伏较大，整体看起来可能比较杂乱。我们可以取一段时间的均值对图进行简化，达到更加简洁美观的可视化效果。

In [5]:
from pyecharts.charts import Line
import pyecharts.options as opts
import pandas as pd

df = pd.read_csv('./data/city_day.csv')
data = df.loc[df['City'] == 'Delhi', ['Date', 'AQI']]

# 获取年份数据列表
list1 = []
for date in data['Date']:
    year = date.split('-')[0]
    list1.append(year)
data['year'] = list1 # 并入
year_message = data.groupby(['year']) # 分组
year_com = year_message['AQI'].agg(['mean']) # 求平均
year_com.reset_index(inplace=True) # 重置索引
year_com_last = year_com.sort_index() # 重新排序
# 可以和主题河流图的链式调用对比查看

x = [e for e in year_com_last['year']]
y = [int(e) for e in year_com_last['mean']]

line = Line()
line.add_xaxis(x)
line.add_yaxis('Delhi', y)

line.set_series_opts(
    markpoint_opts=opts.MarkPointOpts(
        data=[
            opts.MarkPointItem(type_='max', name='最大值'),
            opts.MarkPointItem(type_='min', name='最小值'),
        ]
    ),
    label_opts=opts.LabelOpts(is_show=False),# 取消了均值曲线
)

line.set_global_opts(
    title_opts=opts.TitleOpts(
        title='2015-2020印度德里AQI年均走势图',
        pos_left='center',
    ),
    legend_opts=opts.LegendOpts(
        pos_left='left',
        orient='vertical',
    )
)

# line.render('line_AQI_year.html')
line.render_notebook()

In [6]:
from pyecharts.charts import Line
import pyecharts.options as opts
import pandas as pd

df = pd.read_csv('./data/city_day.csv')
data = df.loc[df['City'] == 'Delhi', ['Date', 'PM2.5']]

list1 = []
for date in data['Date']:
    year = date.split('-')[0]
    list1.append(year)
data['year'] = list1
year_message = data.groupby(['year'])
year_com = year_message['PM2.5'].agg(['mean'])
year_com.reset_index(inplace=True)
year_com_last = year_com.sort_index()

x = [e for e in year_com_last['year']]
y = [int(e) for e in year_com_last['mean']]

line = Line()
line.add_xaxis(x)
line.add_yaxis('Delhi', y)

line.set_series_opts(
    markpoint_opts=opts.MarkPointOpts(
        data=[
            opts.MarkPointItem(type_='max', name='最大值'),
            opts.MarkPointItem(type_='min', name='最小值'),
        ]
    ),
    label_opts=opts.LabelOpts(is_show=False),
)

line.set_global_opts(
    title_opts=opts.TitleOpts(
        title='2015-2020印度德里PM2.5年度走势图',
        pos_left='center',
    ),
    legend_opts=opts.LegendOpts(
        pos_left='left',
        orient='vertical',
    )
)

# line.render('line_PM2.5_year.html')
line.render_notebook()

#### 8.4.2 箱形图

将原数据按照季度进行分析，可以使用箱型图来进行展示。


In [7]:
from pyecharts.charts import Boxplot
import pyecharts.options as opts
import pandas as pd

df = pd.read_csv('./data/city_day.csv')
data = df.loc[df['City'] == 'Delhi', ['Date', 'AQI']]

dom1, dom2, dom3 = [], [], []


# 把数据按年份分为三类
list1 = []
for date, AQI in zip(data['Date'], data['AQI']):
    year = date.split('-')[0]
    if year in ['2015', '2016']:
        dom1.append(AQI)
    elif year in ['2017', '2018']:
        dom2.append(AQI)
    elif year in ['2019', '2020']:
        dom3.append(AQI)

x = ['2015-2016年', '2017-2018年', '2019-2020年']
y = [dom1, dom2, dom3]

boxplot = Boxplot()

boxplot.add_xaxis(x)
boxplot.add_yaxis('Delhi', boxplot.prepare_data(y)) # 计算箱型图的上边缘、下边缘、中位数和两个四分位数（1/4,3/4）

boxplot.set_global_opts(
    title_opts=opts.TitleOpts(
        title='2015-2020印度德里AQI箱型图',
        pos_left='center',
    ),
    legend_opts=opts.LegendOpts(
        pos_left='left',
        orient='vertical',
    )
)

# boxplot.render('boxplot_AQI.html')
boxplot.render_notebook()

In [8]:
from pyecharts.charts import Boxplot
import pyecharts.options as opts
import pandas as pd

df = pd.read_csv('./data/city_day.csv')
data = df.loc[df['City'] == 'Delhi', ['Date', 'PM2.5']]

dom1, dom2, dom3 = [], [], []

list1 = []
for date, AQI in zip(data['Date'], data['PM2.5']):
    year = date.split('-')[0]
    if year in ['2015', '2016']:
        dom1.append(AQI)
    elif year in ['2017', '2018']:
        dom2.append(AQI)
    elif year in ['2019', '2020']:
        dom3.append(AQI)

x = ['2015-2016年', '2017-2018年', '2019-2020年']
y = [dom1, dom2, dom3]

boxplot = Boxplot()

boxplot.add_xaxis(x)
boxplot.add_yaxis('Delhi', boxplot.prepare_data(y))

boxplot.set_global_opts(
    title_opts=opts.TitleOpts(
        title='2015-2020印度德里PM2.5箱型图',
        pos_left='center',
    ),
    legend_opts=opts.LegendOpts(
        pos_left='left',
        orient='vertical',
    )
)

# boxplot.render('boxplot_PM2.5.html')
boxplot.render_notebook()

#### 8.4.3 饼图

用户可以通过单击图例，选择是否在饼图中显示该等级的占比情况。

In [9]:
from pyecharts.charts import Pie
import pyecharts.options as opts
import pandas as pd

df = pd.read_csv('./data/city_day.csv')
data = df.loc[df['City'] == 'Delhi', ['Date', 'AQI_Bucket']]

rank_message = data.groupby(['AQI_Bucket']) # 按等级分组
rank_com = rank_message['AQI_Bucket'].agg(['count']) # agg等级计数
rank_com.reset_index(inplace=True) # 重置索引
rank_com_last = rank_com.sort_values('count', ascending=False) #不升序，即降序排列

x = rank_com_last['AQI_Bucket']
y = rank_com_last['count']

pie = Pie()

input_data = [list(z) for z in zip(x, y)]
pie.add('Delhi', input_data, radius=['30%', '75%'])

pie.set_series_opts(
    label_opts=opts.LabelOpts(formatter='{b}：{d}%') #设置数据格式
)

pie.set_global_opts(
    title_opts=opts.TitleOpts(
        title='2015-2020印度德里空气质量情况',
        pos_left='center',
    ),
    legend_opts=opts.LegendOpts(
        pos_left='left',
        orient='vertical',
    )
)

# pie.render('pie.html')

pie.render_notebook()


#### 8.4.4 日历热力图

该图以颜色表示时间序列数值大小，可李处一年的情况，有助于我们宏观把握一段时间的情况。

In [10]:
from pyecharts.charts import Calendar
import pyecharts.options as opts
import pandas as pd
import datetime

df = pd.read_csv('./data/city_day.csv')
data = df.loc[df['City'] == 'Delhi', ['Date', 'PM2.5']]

list1 = []
for date, pm in zip(data['Date'], data['PM2.5']):
    time_list = date.split('-')
    time = datetime.date(int(time_list[0]), int(time_list[1]), int(time_list[2])) # 将日期数据转换为标准格式 满足echarts要求
    list1.append([str(time), pm])

calendar = Calendar()

calendar.add(
    '',
    list1,
    calendar_opts=opts.CalendarOpts(
        pos_top="120",
        pos_left="30",
        pos_right="30",
        range_="2017", # pyecharts只支持显示一年日历热力图。多年可参考多重数据源
        yearlabel_opts=opts.CalendarYearLabelOpts(is_show=False),
        ),
    )

calendar.set_global_opts(
        title_opts=opts.TitleOpts(pos_top="30", pos_left="center", title="2017年印度德里空气质量情况"), 
        visualmap_opts=opts.VisualMapOpts(
            max_=300, min_=0, orient="horizontal", is_piecewise=False # 设置热力图数值- 颜色变化区间
        ),
    )

# calendar.render('heatmap.html')
calendar.render_notebook()


#### 8.4.5 多个数据源

##### 多条折线展示


In [11]:
from pyecharts.charts import Line
import pyecharts.options as opts
import pandas as pd


def get_line(city):

    data = df.loc[df['City'] == city, ['Date', 'AQI']] # 给入城市获取指定城市的AQI均值

    list1 = []
    for date in data['Date']:
        year = date.split('-')[0]
        list1.append(year)
    data['year'] = list1
    year_message = data.groupby(['year'])
    year_com = year_message['AQI'].agg(['mean'])
    year_com.reset_index(inplace=True)
    year_com_last = year_com.sort_index()

    y = [e for e in year_com_last['mean']]

    return y


df = pd.read_csv('./data/city_day.csv')
cities = ['Ahmedabad', 'Chennai', 'Delhi', 'Lucknow']

line = Line()

line.add_xaxis(['2015', '2016', '2017', '2018', '2019', '2020'])
for city in cities: #将数据传入line中
    y = get_line(city)
    line.add_yaxis(city, y)

line.set_series_opts(
    label_opts=opts.LabelOpts(is_show=False),
)

line.set_global_opts(
    title_opts=opts.TitleOpts(
        title='2015-2020印度四大城市AQI年均走势图',
        pos_left='center',
    ),
    legend_opts=opts.LegendOpts(
        pos_left='left',
        orient='vertical',
    )
)

# line.render('line_AQI.html')
line.render_notebook()

In [12]:
from pyecharts.charts import Line
import pyecharts.options as opts
import pandas as pd


def get_line(city):

    data = df.loc[df['City'] == city, ['Date', 'PM2.5']]

    list1 = []
    for date in data['Date']:
        year = date.split('-')[0]
        list1.append(year)
    data['year'] = list1
    year_message = data.groupby(['year'])
    year_com = year_message['PM2.5'].agg(['mean'])
    year_com.reset_index(inplace=True)
    year_com_last = year_com.sort_index()

    y = [e for e in year_com_last['mean']]

    return y


df = pd.read_csv('./data/city_day.csv')
cities = ['Ahmedabad', 'Chennai', 'Delhi', 'Lucknow']

line = Line()

line.add_xaxis(['2015', '2016', '2017', '2018', '2019', '2020'])
for city in cities:
    y = get_line(city)
    line.add_yaxis(city, y)

line.set_series_opts(
    label_opts=opts.LabelOpts(is_show=False),
)

line.set_global_opts(
    title_opts=opts.TitleOpts(
        title='2015-2020印度四大城市PM2.5年均走势图',
        pos_left='center',
    ),
    legend_opts=opts.LegendOpts(
        pos_left='left',
        orient='vertical',
    )
)

line.render_notebook()

##### 多个饼图展示


In [13]:
from pyecharts.charts import Pie
from pyecharts.charts import Grid
import pyecharts.options as opts
import pandas as pd


def get_pie(city, center1, center2): # 城市、圆心、标题位置
    data = df.loc[df['City'] == city, ['Date', 'AQI_Bucket']]

    rank_message = data.groupby(['AQI_Bucket'])
    rank_com = rank_message['AQI_Bucket'].agg(['count'])
    rank_com.reset_index(inplace=True)
    rank_com_last = rank_com.sort_values('count', ascending=False)

    x = rank_com_last['AQI_Bucket']
    y = rank_com_last['count']

    pie = Pie(init_opts=opts.InitOpts(width='400px', height='300px'))

    input_data = [list(z) for z in zip(x, y)]
    pie.add(city, input_data, center=center1, radius=['15%', '30%']) #设置饼图圆心位于图表的位置

    pie.set_series_opts(
        label_opts=opts.LabelOpts(formatter='{b}：{d}%')
    )

    pie.set_global_opts(
        title_opts=opts.TitleOpts(
            title=city,
            pos_left=center2[0], # 设置饼图标题位于图表的位置
            pos_top=center2[1],
        ),
        legend_opts=opts.LegendOpts(
            is_show=False
        )
    )

    return pie


df = pd.read_csv('./data/city_day.csv')
cities = ['Ahmedabad', 'Chennai', 'Delhi', 'Lucknow']
pos1 = [['25%', '25%'], ['25%', '75%'], ['75%', '25%'], ['75%', '75%']] #定义位置，这里用到的是图表的四个角
pos2 = [['20%', '23%'], ['22%', '74%'], ['73%', '23%'], ['71%', '74%']]

grid = Grid(init_opts=opts.InitOpts(width='1200px', height='800px'))
for i in range(len(cities)):
    grid.add(get_pie(cities[i], pos1[i], pos2[i]), grid_opts=opts.GridOpts()) #依次将图加入grid
# grid.render('grid.html')
grid.render_notebook()