# boss_analysis + pyecharts

## 1. 数据获取

BOSS直聘上有这么一个接口，可以很好的获取当前不同岗位，不同城市的薪资水平

https://www.zhipin.com/wapi/zpboss/h5/marketpay/statistics.json

参数
- java:position=100101
- 产品经理:position=110101
- python:position=100109

In [1]:
import requests
headers = {'accept': 'application/json, text/plain, */*',
          'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_0_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36'}


In [2]:
querystring = {"positionId":"110104","industryId":"0","cityId":"0","companySize":"0","financingStage":"0","experienceCode":"0"}


In [3]:
job_statics_url = 'https://www.zhipin.com/wapi/zpboss/h5/marketpay/statistics.json'

In [4]:
job_statics_data = requests.get(job_statics_url, params=querystring, headers=headers)

In [5]:
job_statics_data.json()

{'code': 0,
 'message': 'Success',
 'zpData': {'positionId': 110104,
  'industryId': 0,
  'cityId': 0,
  'companySize': 0,
  'financingStage': 0,
  'experienceCode': 0,
  'sampleCount': 734348,
  'sampleInsufficient': False,
  'salaryByMonth': [{'year': '2020年',
    'showYear': True,
    'month': '3月',
    'monthAveSalary': 4961,
    'title': 'Mar',
    'grow': True,
    'percent': 0.96,
    'effectiveJobCount': 0,
    'effectiveJobGrowthRatio': 0.0,
    'effectiveJobGrowthOneMonth': 0,
    'effectiveJobGrowthTwoMonth': 0},
   {'year': '2020年',
    'showYear': False,
    'month': '4月',
    'monthAveSalary': 4836,
    'title': 'Apr',
    'grow': False,
    'percent': 2.52,
    'effectiveJobCount': 0,
    'effectiveJobGrowthRatio': 0.0,
    'effectiveJobGrowthOneMonth': 0,
    'effectiveJobGrowthTwoMonth': 0},
   {'year': '2020年',
    'showYear': False,
    'month': '5月',
    'monthAveSalary': 4735,
    'title': 'May',
    'grow': False,
    'percent': 2.09,
    'effectiveJobCount': 0,
 

## 2. 数据分析可视化
### 薪资分位值

在我们获取到的数据当中，就有分位值的数据，可以方便的获取

In [6]:
job_statics_data_json = job_statics_data.json()
job_statics_data_json['zpData']['salaryByPoints']

[{'name': '10分位', 'title': '低端', 'salary': 39450, 'point': 10},
 {'name': '25分位', 'title': '中低端', 'salary': 48078, 'point': 25},
 {'name': '50分位', 'title': '中端', 'salary': 60436, 'point': 50},
 {'name': '75分位', 'title': '中高端', 'salary': 77767, 'point': 75},
 {'name': '90分位', 'title': '高端', 'salary': 98956, 'point': 90}]

#### 薪资分位 - 折线图

In [7]:
statics_x = []
statics_y = []
for i in job_statics_data_json['zpData']['salaryByPoints']:
    statics_x.append(i['name'] + '\n' + i['title'])
    statics_y.append(i['salary'])

In [8]:
import pyecharts.options as opts
from pyecharts.charts import Line, Bar, Pie, Calendar, WordCloud
from pyecharts.commons.utils import JsCode
from pyecharts.globals import SymbolType
import datetime
import random

In [9]:
x_data = statics_x
y_data = statics_y

background_color_js = (
    "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
    "[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
)
area_color_js = (
    "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
    "[{offset: 0, color: '#eb64fb'}, {offset: 1, color: '#3fbbff0d'}], false)"
)

c_line = (
    Line(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js)))
    .add_xaxis(xaxis_data=x_data)
    .add_yaxis(
        series_name="薪资",
        y_axis=y_data,
        is_smooth=True,
        is_symbol_show=True,
        symbol="circle",
        symbol_size=6,
        linestyle_opts=opts.LineStyleOpts(color="#fff"),
        label_opts=opts.LabelOpts(is_show=True, position="top", color="white"),
        itemstyle_opts=opts.ItemStyleOpts(
            color="red", border_color="#fff", border_width=3
        ),
        tooltip_opts=opts.TooltipOpts(is_show=False),
        areastyle_opts=opts.AreaStyleOpts(color=JsCode(area_color_js), opacity=1),
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="收入分位",
            pos_bottom="5%",
            pos_left="center",
            title_textstyle_opts=opts.TextStyleOpts(color="#fff", font_size=16),
        ),
        xaxis_opts=opts.AxisOpts(
            type_="category",
            boundary_gap=False,
            axislabel_opts=opts.LabelOpts(margin=30, color="#ffffff63"),
            axisline_opts=opts.AxisLineOpts(is_show=False),
            axistick_opts=opts.AxisTickOpts(
                is_show=True,
                length=25,
                linestyle_opts=opts.LineStyleOpts(color="#ffffff1f"),
            ),
            splitline_opts=opts.SplitLineOpts(
                is_show=True, linestyle_opts=opts.LineStyleOpts(color="#ffffff1f")
            ),
        ),
        yaxis_opts=opts.AxisOpts(
            type_="value",
            position="right",
            axislabel_opts=opts.LabelOpts(margin=20, color="#ffffff63"),
            axisline_opts=opts.AxisLineOpts(
                linestyle_opts=opts.LineStyleOpts(width=2, color="#fff")
            ),
            axistick_opts=opts.AxisTickOpts(
                is_show=True,
                length=15,
                linestyle_opts=opts.LineStyleOpts(color="#ffffff1f"),
            ),
            splitline_opts=opts.SplitLineOpts(
                is_show=True, linestyle_opts=opts.LineStyleOpts(color="#ffffff1f")
            ),
        ),
        legend_opts=opts.LegendOpts(is_show=False),
    )
)

In [10]:
c_line.render_notebook()

#### 薪资分布 - 横向柱形图

In [11]:
statics_x = []
statics_y = []
for i in job_statics_data_json['zpData']['salaryByDistributed']:
    statics_y.append(i['percent'])
    statics_x.append(i['salaryRange'])


In [12]:
print(statics_x)
print(statics_y)

['<4.69', '4.69-6.18', '6.18-7.66', '7.66-9.15', '>9.15']
[22, 29, 21, 12, 16]


In [13]:
def bar_chart(x, y) -> Bar:
    background_color_js = (
        "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
        "[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
    )
    c = (
        Bar(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js)))
        #Bar()
        .add_xaxis(x)
        # .add_xaxis(searchcount.index.tolist()[:10])
        .reversal_axis()
        .add_yaxis("", y, 
                   label_opts=opts.LabelOpts(position='inside', formatter="{c}%"),
                  color='plum', category_gap="60%"
                  )
        .set_global_opts(xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30, formatter="{value}%"),
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),),
                        yaxis_opts=opts.AxisOpts(
                            axislabel_opts=opts.LabelOpts(is_show=True),
                        axisline_opts=opts.AxisLineOpts(is_show=False),
                        axistick_opts=opts.AxisTickOpts(
                        is_show=True,
                        length=25,
                        linestyle_opts=opts.LineStyleOpts(color="#ffffff1f"),
                    ),)
                        )
        .set_series_opts(
            itemstyle_opts={
            "normal": {
                "color": JsCode("""new echarts.graphic.LinearGradient(0, 0, 0, 1, [{
                    offset: 0,
                    color: 'rgba(255,100,97,.5)'
                }, {
                    offset: 1,
                    color: 'rgba(221,160,221)'
                }], false)"""),
                "barBorderRadius": [30, 30, 30, 30],
                "shadowColor": 'rgb(0, 160, 221)',
            }}
        )
    )
    return c

In [14]:
bar_chart(statics_x, statics_y).render_notebook()

#### 薪资与工作经验 - 饼状玫瑰图

In [15]:
statics_x = []
statics_y = []
for i in job_statics_data_json['zpData']['salaryByWorkExp']:
    statics_y.append(i['percent'])
    statics_x.append(i['workExp'] + ':' + str(i['aveSalary']))
statics_y

[19, 77, 4, 0, 0]

In [16]:
list(zip(statics_x, statics_y))

[('1年以内:4242', 19),
 ('1-3年:4994', 77),
 ('3-5年:6615', 4),
 ('5-10年:7948', 0),
 ('10年以上:8749', 0)]

In [17]:
def pie_rosetype(data) -> Pie:
    background_color_js = (
    "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
    "[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
)
    c = (
        Pie(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js)))
        .add(
            "",
            list(data),
            radius=["30%", "75%"],
            center=["45%", "50%"],
            rosetype="radius",
            label_opts=opts.LabelOpts(formatter="{b}: {c}"),
        )
        .set_global_opts(title_opts=opts.TitleOpts(title=""),
                        )
    )
    return c

In [18]:
pie_rosetype(zip(statics_x, statics_y)).render_notebook()

#### 薪资与工作经验 - 饼图

In [19]:
background_color_js = (
    "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
    "[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
)
c = (
    Pie(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js)))
    .add(
        "",
        list(zip(statics_x, statics_y)),
        radius=["40%", "55%"],
        label_opts=opts.LabelOpts(
            position="outside",
            formatter="{a|job}{abg|}\n{hr|}\n {b|{b}: }{per|{d}%}  ",
            background_color="#eee",
            border_color="#aaa",
            border_width=1,
            border_radius=4,
            rich={
                "a": {"color": "#999", "lineHeight": 22, "align": "center"},
                "abg": {
                    "backgroundColor": "#e3e3e3",
                    "width": "100%",
                    "align": "right",
                    "height": 22,
                    "borderRadius": [4, 4, 0, 0],
                },
                "hr": {
                    "borderColor": "#aaa",
                    "width": "100%",
                    "borderWidth": 0.5,
                    "height": 0,
                },
                "b": {"fontSize": 16, "lineHeight": 33},
                "per": {
                    "color": "#eee",
                    "backgroundColor": "#334455",
                    "padding": [2, 4],
                    "borderRadius": 2,
                },
            },
        ),
    )
    .set_global_opts(title_opts=opts.TitleOpts(title=""))
)

In [20]:
c.render_notebook()

#### 人数与年龄 - 柱状图

In [21]:
statics_x = []
statics_y = []
for i in job_statics_data_json['zpData']['salaryByAge']:
    statics_x.append(i['ageRange'])
    statics_y.append(i['people'])
print(statics_x)
print(statics_y)

['24岁以下', '25岁-29岁', '30岁-34岁', '35岁-39岁', '40岁-44岁', '45岁以上']
[512382, 203200, 15833, 2072, 494, 367]


In [22]:
def bar_chart_age(x, y) -> Bar:
    background_color_js = (
        "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
        "[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
    )
    c = (
        Bar(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js)))
        #Bar()
        .add_xaxis(x)
        # .add_xaxis(searchcount.index.tolist()[:10])
        # .reversal_axis()
        .add_yaxis("", y, 
                   label_opts=opts.LabelOpts(position='inside', formatter="{c}"),
                  color='plum', category_gap="60%"
                  )
        .set_global_opts(xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30, formatter="{value}"),
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),),
                        yaxis_opts=opts.AxisOpts(
                            axislabel_opts=opts.LabelOpts(is_show=True),
                        axisline_opts=opts.AxisLineOpts(is_show=False),
                        axistick_opts=opts.AxisTickOpts(
                        is_show=True,
                        length=25,
                        linestyle_opts=opts.LineStyleOpts(color="#ffffff1f"),
                    ),)
                        )
        .set_series_opts(
            itemstyle_opts={
            "normal": {
                "color": JsCode("""new echarts.graphic.LinearGradient(0, 0, 0, 1, [{
                    offset: 0,
                    color: 'rgba(255,100,97,.5)'
                }, {
                    offset: 1,
                    color: 'rgba(221,160,221)'
                }], false)"""),
                "barBorderRadius": [30, 30, 30, 30],
                "shadowColor": 'rgb(0, 160, 221)',
            }}
        )
    )
    return c

In [23]:
bar_chart_age(statics_x, statics_y).render_notebook()

#### 月度平均薪资 - 折线图

In [24]:
statics_x = []
statics_y = []
for i in job_statics_data_json['zpData']['salaryByMonth']:
    statics_x.append(i['year'] + '-' + i['month'])
    statics_y.append(i['monthAveSalary'])
print(statics_x)
print(statics_y)

['2020年-3月', '2020年-4月', '2020年-5月', '2020年-6月', '2020年-7月', '2020年-8月', '2020年-9月', '2020年-10月', '2020年-11月', '2020年-12月', '2021年-1月', '2021年-2月']
[4961, 4836, 4735, 4761, 4808, 4891, 4977, 5017, 5015, 5132, 5151, 5077]


In [25]:
x_data = statics_x
y_data = statics_y

background_color_js = (
    "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
    "[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
)
area_color_js = (
    "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
    "[{offset: 0, color: '#eb64fb'}, {offset: 1, color: '#3fbbff0d'}], false)"
)

c_line_month = (
    Line(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js)))
    .add_xaxis(xaxis_data=x_data)
    .add_yaxis(
        series_name="薪资",
        y_axis=y_data,
        is_smooth=True,
        is_symbol_show=True,
        symbol="circle",
        symbol_size=6,
        linestyle_opts=opts.LineStyleOpts(color="#fff"),
        label_opts=opts.LabelOpts(is_show=True, position="top", color="white"),
        itemstyle_opts=opts.ItemStyleOpts(
            color="red", border_color="#fff", border_width=3
        ),
        tooltip_opts=opts.TooltipOpts(is_show=False),
        areastyle_opts=opts.AreaStyleOpts(color=JsCode(area_color_js), opacity=1),
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="薪资变化",
            pos_bottom="5%",
            pos_left="center",
            title_textstyle_opts=opts.TextStyleOpts(color="#fff", font_size=16),
        ),
        xaxis_opts=opts.AxisOpts(
            type_="category",
            boundary_gap=False,
            axislabel_opts=opts.LabelOpts(rotate=-20, margin=30, color="#ffffff63"),
            axisline_opts=opts.AxisLineOpts(is_show=False),
            axistick_opts=opts.AxisTickOpts(
                is_show=True,
                length=25,
                linestyle_opts=opts.LineStyleOpts(color="#ffffff1f"),
            ),
            splitline_opts=opts.SplitLineOpts(
                is_show=True, linestyle_opts=opts.LineStyleOpts(color="#ffffff1f")
            ),
        ),
        yaxis_opts=opts.AxisOpts(
            min_=7500,
            type_="value",
            position="right",
            axislabel_opts=opts.LabelOpts(margin=20, color="#ffffff63"),
            axisline_opts=opts.AxisLineOpts(
                linestyle_opts=opts.LineStyleOpts(width=2, color="#fff")
            ),
            axistick_opts=opts.AxisTickOpts(
                is_show=True,
                length=15,
                linestyle_opts=opts.LineStyleOpts(color="#ffffff1f"),
            ),
            splitline_opts=opts.SplitLineOpts(
                is_show=True, linestyle_opts=opts.LineStyleOpts(color="#ffffff1f")
            ),
        ),
        legend_opts=opts.LegendOpts(is_show=False),
    )
)

In [26]:
c_line_month.render_notebook()

#### 城市平均薪资 - 地图

In [27]:
from pyecharts.charts import Map
from pyecharts.faker import Faker

In [28]:
statics_x = []
statics_y = []
for i in job_statics_data_json['zpData']['salaryByCity']:
    if i['cityList']:
        statics_x.append(i['cityList'][0]['cityAveMonthSalary'])
    statics_y.append(i['provinceName'])

In [29]:
background_color_js = (
    "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
    "[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
)
c = (
    Map(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js)))
    .add("全国薪资", [list(z) for z in zip(statics_y, statics_x)], "china")
    .set_global_opts(
        title_opts=opts.TitleOpts(title=""),
        visualmap_opts=opts.VisualMapOpts(max_=15000, min_=6000),
    )
)


In [30]:
c.render_notebook()