In [37]:
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import re
import math
import jieba
from pyecharts.commons.utils import JsCode
from pyecharts import options as opts
from pyecharts.globals import ThemeType, SymbolType, GeoType
from pyecharts.charts import Pie,Bar, Map,Geo,Page,Line,Tab,Grid,Scatter

In [3]:
df_bs = pd.read_excel('new_all.xlsx', usecols=["coname","jobname","degree","workyear","cityname","welfare","salary","cotype"])

In [30]:
df_bs['salary'] = df_bs['salary'].fillna('9999')
df_cl = df_bs[(df_bs.salary=='9999')].index.tolist()
df_bs = df_bs.drop(df_cl)

In [38]:
df_bs.head(5)

Unnamed: 0,jobname,coname,workyear,degree,cityname,welfare,salary,cotype
0,C#高级开发工程师,北京赛融信科技股份有限公司,3-4年,本科,北京,"五险一金,年终奖金,餐饮补贴",10000.0,民营公司
1,C#高级开发工程师,北京恒泰实达科技股份有限公司,5-7年,本科,北京,"五险一金,定期体检,绩效奖金",15000.0,上市公司
2,C#高级开发工程师,达科信息科技（北京）有限公司,5-7年,本科,北京,,20000.0,外资（非欧美）
3,C#高级开发工程师,北京华航唯实机器人科技股份有限公司,2年,本科,北京,"五险一金,专业培训,年终奖金,工作餐",12000.0,民营公司
4,高级C#工程师,北京小月智联科技有限公司,5-7年,本科,北京,"五险一金,弹性工作,定期体检,股票期权,年终奖金,员工旅游",13000.0,民营公司


In [35]:
def dispose(mintomax):
    '''
    :param mintomax: 去除单位后的字符串  例如 '1.2-5' 等
    :return: 长度为 2 的列表 例如如果是上面的数据将返回 [1.2.0,5.0]
    '''
    mtm = None
    if '-' in mintomax:
        mtm = mintomax.split('-')
    else:
        mtm = [mintomax, mintomax]
    return [float(mtm[0]), float(mtm[1])]

class Salary:
    min = None
    max = None
    average = None

    def __init__(self, min, max, ex=None):
        '''
        :param min:  最小工资
        :param max:  最大工资
        :param ex:   未处理的工资字符串
        '''
        self.ex = ex
        if min is not None and max is not None and min != '' and max != '':
            self.min = float('%.1f'%min)
            self.max = float('%.1f'%max)
            self.average = float('%.1f'%((min + max) / 2))  # 平均工资
        else:
            self.min = min
            self.max = max
            self.average = None


def toPubSalary(salary):
    salary = str(salary)
    '''
    :param salary: 工资字符串 例如 '1.2-5 千/月'
    :return: 返回一个工资对象{Salary}
    '''
    sal = None
    if salary is None or salary == '':
        sal = Salary(None, None)
        return sal
    salary = salary.replace(' ', '')
    wang_m = '万/月'
    wang_y = '万/年'
    qian_m = '千/月'
    qian_d = '千/天'
    yuan_d = '元/天'
    index = 0
    if wang_m in salary:
        index = salary.find(wang_m)  # 万/月
        min_max = dispose(salary[:index])
        sal = Salary(min_max[0] * 10000, min_max[1] * 10000)
    elif wang_y in salary:  #  万/年
        index = salary.find(wang_y)
        min_max = dispose(salary[:index])
        sal = Salary(min_max[0] / 12 * 10000, min_max[1] / 12 * 10000)
    elif qian_m in salary:  # 千/月
        index = salary.find(qian_m)
        min_max = dispose(salary[:index])
        sal = Salary(min_max[0] * 1000, min_max[1] * 1000)
    elif qian_d in salary:  # 千/天
        index = salary.find(qian_d)
        min_max = dispose(salary[:index])
        sal = Salary(min_max[0] * 1000 * 30, min_max[1] * 1000 * 30)
    elif yuan_d in salary:
        index = salary.find(yuan_d)
        min_max = dispose(salary[:index])
        sal = Salary(min_max[0]*30, min_max[1]*30)
    else:
        sal = Salary(None, None, ex=salary)
    return sal


In [18]:
salary_list = []
for salary in df_bs['salary']:
    salary_list.append(toPubSalary(salary))


In [19]:
max_salary = []
for sal in salary_list:
    max_salary.append(sal.max)

In [20]:
min_salary = []
for sal in salary_list:
    min_salary.append(sal.min)

In [21]:
average_salary = []
for sal in salary_list:
    average_salary.append(sal.average)

In [22]:
#以上是处理工资的代码块 尚未处理完成

In [23]:
ex_salary = []
for sal in salary_list:
    ex_salary.append(sal.ex)

In [24]:
df_bs['salary'] = min_salary

In [22]:
beijing = df_bs.loc[df_bs['cityname']=='北京',["coname","jobname","degree","cityname","welfare","salary","cotype"]]

### 以下为工资部分

In [23]:
t1 = df_bs[df_bs.cityname=='深圳'].groupby('salary').size()

In [24]:
t2 = df_bs[df_bs.cityname=='上海'].groupby('salary').size()

In [25]:
t3 = df_bs[df_bs.cityname=='北京'].groupby('salary').size()

In [26]:
t4 = df_bs[df_bs.cityname=='广州'].groupby('salary').size()

In [27]:
t5 = df_bs[df_bs.cityname=='昆明'].groupby('salary').size()

In [28]:
p0 = pd.concat([t1, t2,t3,t4,t5], axis=1, sort= False).fillna(0)

In [29]:
szbar = Bar()
szbar.add_xaxis(p0[0].index.tolist())
szbar.add_yaxis('深圳',p0[0].tolist())
szbar.set_global_opts(
        title_opts=opts.TitleOpts(title="城市工资对比"),
        datazoom_opts=opts.DataZoomOpts(type_="inside"),
    )
szbar.set_series_opts(
        itemstyle_opts={
            "normal": {
                "color": JsCode(
                    """new echarts.graphic.LinearGradient(0, 0, 0, 1, [{
                offset: 0,
                color: 'rgba(0, 244, 255, 1)'
            }, {
                offset: 1,
                color: 'rgba(0, 77, 167, 1)'
            }], false)"""
                ),
                "barBorderRadius": [30, 30, 30, 30],
                "shadowColor": "rgb(0, 160, 221)",
            }
        }
    )
#szbar.render_notebook()

shbar = Bar()
shbar.add_xaxis(p0[0].index.tolist())
shbar.add_yaxis('上海',p0[1].tolist())
shbar.set_global_opts(
        title_opts=opts.TitleOpts(title="城市工资对比"),
        datazoom_opts=opts.DataZoomOpts(type_="inside"),
    )
shbar.set_series_opts(
        itemstyle_opts={
            "normal": {
                "color": JsCode(
                    """new echarts.graphic.LinearGradient(0, 0, 0, 1, [{
                offset: 0,
                color: 'rgba(0, 244, 255, 1)'
            }, {
                offset: 1,
                color: 'rgba(0, 77, 167, 1)'
            }], false)"""
                ),
                "barBorderRadius": [30, 30, 30, 30],
                "shadowColor": "rgb(0, 160, 221)",
            }
        }
    )
#shbar.render_notebook()

bjbar = Bar()
bjbar.add_xaxis(p0[0].index.tolist())
bjbar.add_yaxis('北京',p0[2].tolist())
bjbar.set_global_opts(
        title_opts=opts.TitleOpts(title="城市工资对比"),
        datazoom_opts=opts.DataZoomOpts(type_="inside"),
    )
bjbar.set_series_opts(
        itemstyle_opts={
            "normal": {
                "color": JsCode(
                    """new echarts.graphic.LinearGradient(0, 0, 0, 1, [{
                offset: 0,
                color: 'rgba(0, 244, 255, 1)'
            }, {
                offset: 1,
                color: 'rgba(0, 77, 167, 1)'
            }], false)"""
                ),
                "barBorderRadius": [30, 30, 30, 30],
                "shadowColor": "rgb(0, 160, 221)",
            }
        }
    )
#bjbar.render_notebook()

gzbar = Bar()
gzbar.add_xaxis(p0[0].index.tolist())
gzbar.add_yaxis('广州',p0[3].tolist())
gzbar.set_global_opts(
        title_opts=opts.TitleOpts(title="城市工资对比"),
        datazoom_opts=opts.DataZoomOpts(type_="inside"),
    )
gzbar.set_series_opts(
        itemstyle_opts={
            "normal": {
                "color": JsCode(
                    """new echarts.graphic.LinearGradient(0, 0, 0, 1, [{
                offset: 0,
                color: 'rgba(0, 244, 255, 1)'
            }, {
                offset: 1,
                color: 'rgba(0, 77, 167, 1)'
            }], false)"""
                ),
                "barBorderRadius": [30, 30, 30, 30],
                "shadowColor": "rgb(0, 160, 221)",
            }
        }
    )

kmbar = Bar()
kmbar.add_xaxis(p0[0].index.tolist())
kmbar.add_yaxis('昆明',p0[4].tolist())
kmbar.set_global_opts(
        title_opts=opts.TitleOpts(title="城市工资对比"),
        datazoom_opts=opts.DataZoomOpts(type_="inside"),
    )
kmbar.set_series_opts(
        itemstyle_opts={
            "normal": {
                "color": JsCode(
                    """new echarts.graphic.LinearGradient(0, 0, 0, 1, [{
                offset: 0,
                color: 'rgba(0, 244, 255, 1)'
            }, {
                offset: 1,
                color: 'rgba(0, 77, 167, 1)'
            }], false)"""
                ),
                "barBorderRadius": [30, 30, 30, 30],
                "shadowColor": "rgb(0, 160, 221)",
            }
        }
    )


tab = Tab()
tab.add(szbar,'深圳')
tab.add(shbar,'上海')
tab.add(bjbar,'北京')
tab.add(gzbar,'广州')
tab.add(kmbar,'昆明')
tab.render_notebook()

#### 公司性质

In [30]:
#以下得出每个性质的公司对学历的要求

In [31]:
co1 = df_bs[df_bs.cotype=='创业公司'].groupby('degree').size()

In [32]:
co2 = df_bs[df_bs.cotype=='非营利组织'].groupby('degree').size()

In [33]:
co4 = df_bs[df_bs.cotype=='国企'].groupby('degree').size()

In [34]:
co5 = df_bs[df_bs.cotype=='民营公司'].groupby('degree').size()

In [35]:
co6 = df_bs[df_bs.cotype=='上市公司'].groupby('degree').size()

In [36]:
co7 = df_bs[df_bs.cotype=='事业单位'].groupby('degree').size()

In [37]:
co8 = df_bs[df_bs.cotype=='外企代表处'].groupby('degree').size()

In [38]:
co9= df_bs[df_bs.cotype=='政府机关'].groupby('degree').size()

In [39]:
#把获取到的数据用items做成列表
cot = [list(coty) for coty in co9.items()]

In [40]:
degco  = [list(dc) for dc in df_bs['cotype'].value_counts().items()]

In [41]:
cop = Pie()
cop.add('创业公司学历求', [list(cod) for cod in degco])
cop.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"))
cop.set_global_opts(
    title_opts = opts.TitleOpts(title="公司性质",subtitle="cotype",pos_left="85%"),
    legend_opts=opts.LegendOpts(pos_right="85%",orient="vertical"),
                    )
cop.render_notebook()

In [42]:
coty = [list(coty) for coty in df_bs['cotype'].value_counts().items()]

In [166]:
import json
import os
from pyecharts import options as opts
from pyecharts.charts import Page, TreeMap
data = [
         {
          "value": 2082,
          "name": "创业公司",
          "children":[
              {"value":1308,"name":"创业公司-本科"},
              {"value":587,"name":"专科"},
              {"value":5,"name":"博士"},
              {"value":90,"name":"硕士"},
              {"value":7,"name":"高中"},
              {"value":13,"name":"中专"},
          ]
         },
         {
             "value": 8030,
             "name": "国企",
             "children": [
                             {"value":18, "name": "博士"},
                             {"value":681, "name": "硕士"},
                             {"value":6627, "name": "国企-本科"},
                             {"value": 698, "name": "大专"},
                             {"value": 6, "name": "中专中技"},
             ],
         },
         
         {
             "value":88903,
             "name":"民营公司",
             "children":[
                 {"value":102, "name": "博士"},
                 {"value":2694, "name": "硕士"},
                 {"value":54501, "name": "民营公司-本科"},
                 {"value":30921, "name": "大专"},
                 {"value":446, "name": "中专中技"},
                 {"value":225,"name":"高中"},
                 {"value":5,"name":"初中及以下"}
                 
             ]
         },
    {
             "value": 13905,
             "name": "上市公司",
             "children": [
                             {"value":21, "name": "博士"},
                             {"value":548, "name": "硕士"},
                             {"value":10705, "name": "上市公司-本科"},
                             {"value": 2616, "name": "大专"},
                             {"value": 10, "name": "中专中技"},
                             {"value":4,"name":"高中"}
             ],
         },
         
    {
             "value": 984,
             "name": "事业单位",
             "children": [
                             {"value":26, "name": "博士"},
                             {"value":227, "name": "硕士"},
                             {"value":660, "name": "事业单位-本科"},
                             {"value": 68, "name": "大专"},
                             {"value": 3, "name": "中专中技"},
             ],
         },
         
     {
             "value": 94,
             "name": "外企代表处",
             "children": [
                             {"value":1, "name": "博士"},
                             {"value":1, "name": "硕士"},
                             {"value":64, "name": "外企代表处-本科"},
                             {"value": 24, "name": "大专"},
                             {"value": 4, "name": "高中"},
             ],
         },
     {
             "value": 8030,
             "name": "政府机关",
             "children": [
                             {"value":15, "name": "硕士"},
                             {"value":90, "name": "政府机关-本科"},
                             {"value": 16, "name": "大专"},
                             {"value": 1, "name": "中专"},
             ],
         },
    
    
     ]
treemap = (
         TreeMap()
         .add("学历要求", data)
         .set_global_opts(title_opts=opts.TitleOpts(title="对学历的要求", subtitle="不同的颜色 对应不同性质的公司信息"))
     )
 
#treemap.render_notebook()

#### 以下为每个城市的学历要求

In [44]:
d1 = df_bs[df_bs.cityname=='北京'].groupby('degree').size() #测试可行

In [45]:
d2 = df_bs[df_bs.cityname=='上海'].groupby('degree').size()

In [46]:
d3 = df_bs[df_bs.cityname=='广州'].groupby('degree').size()

In [47]:
d4 = df_bs[df_bs.cityname=='深圳'].groupby('degree').size()

In [48]:
d5 = df_bs[df_bs.cityname=='昆明'].groupby('degree').size()

In [49]:
d0 = pd.concat([d1,d2,d3,d4,d5], axis=1, sort= False).fillna(0)

In [82]:
line = Line()
line.add_xaxis(d0[0].index.tolist())
line.add_yaxis('北京',d0[0].tolist())
line.add_yaxis('上海',d0[1].tolist())
line.add_yaxis('广州',d0[2].tolist())
line.add_yaxis('深圳',d0[3].tolist())
line.add_yaxis('昆明',d0[4].tolist())
line.set_global_opts(title_opts = opts.TitleOpts(title="五个城市对学历的要求"))
line.render_notebook()

### 以下为学历部分

In [51]:
degrees = [deg for deg in df_bs['degree'].value_counts().items()]

In [52]:
pie = Pie()
pie.add('学历要求占比',[list(dg) for dg in degrees],
        center=["50%", "60%"],
        radius=["40%", "55%"],
         label_opts=opts.LabelOpts(
            position="outside",
            formatter="{a|{a}}{abg|}\n{hr|}\n {b|{b}: }{c}  {per|{d}%}  ",
            background_color="#eee",
            border_color="#aaa",
            border_width=1,
            border_radius=4,
            rich={
                "a": {"color": "#999", "lineHeight": 22, "align": "center"},
                "abg": {
                    "backgroundColor": "#e3e3e3",
                    "width": "100%",
                    "align": "right",
                    "height": 22,
                    "borderRadius": [4, 4, 0, 0],
                },
                "hr": {
                    "borderColor": "#aaa",
                    "width": "100%",
                    "borderWidth": 0.5,
                    "height": 0,
                },
                "b": {"fontSize": 16, "lineHeight": 33},
                "per": {
                    "color": "#eee",
                    "backgroundColor": "#334455",
                    "padding": [2, 4],
                    "borderRadius": 2,
                },
            },
        ),
    )
pie.set_global_opts(title_opts = opts.TitleOpts(title="学历限制"),
                   legend_opts = opts.LegendOpts(orient="vertical", pos_left="80%")
                   )   


pie.render_notebook()

### 城市分布部分

In [40]:
citynames = [cityname for cityname in df_bs['cityname'].value_counts().items()] # 城市分布绘图和城市数量自动统计图

In [42]:
#岗位分布城市
geo = Geo()
geo.add_schema(maptype="china", itemstyle_opts=opts.ItemStyleOpts(color="#323c48", border_color="#111"))
geo.add("",[list(cityname) for cityname in citynames ])
geo.add('',citynames, type_ = GeoType.EFFECT_SCATTER,symbol_size = 20)
geo.set_series_opts(label_opts = opts.LabelOpts(is_show = False))
geo.set_global_opts(
         visualmap_opts = opts.VisualMapOpts(max_ = 30000, is_piecewise = True),
         title_opts = opts.TitleOpts(title='招聘岗位城市分布')
                   )
#geo.render_notebook()

<pyecharts.charts.basic_charts.geo.Geo at 0x21bfc7adb80>

In [56]:
#以上完成招聘岗位城市分布图

In [47]:
degco = df_bs.loc[df_bs['degree']=='大专',['jobname','degree','salary']]

In [48]:
dfjava = degco[degco['jobname'].str.contains("Java")]

In [61]:
dfC = degco[degco['jobname'].str.contains("C")]

In [62]:
dfPython = degco[degco['jobname'].str.contains("Python")]

In [63]:
dfweb = degco[degco['jobname'].str.contains("前端")]

In [64]:
dfwang = degco[degco['jobname'].str.contains("网络安全")]

In [65]:
dfand = degco[degco['jobname'].str.contains("Android")]

In [49]:
dfjava

Unnamed: 0,jobname,degree,salary
214,Java开发工程师,大专,15000.0
234,爬虫开发工程师（Java或Python）,大专,6000.0
1201,Java/C# 中级开发工程师(MES/MCS)（韩国语优先）,大专,13000.0
1569,Java高级开发工程师,大专,7000.0
1624,Java开发工程师,大专,15000.0
...,...,...,...
140826,Java游戏开发实习生,大专,6000.0
140830,Java游戏服务器,大专,20000.0
140963,游戏Java服务端,大专,15000.0
141058,Java实习游戏开发工程师,大专,3000.0


In [73]:
zk_x = ['Java','C','Python','前端','网络安全','Android']
zk_y = [5875,1895,330,6279,320,1581]

In [164]:
zkpie = (
            Pie(init_opts=opts.InitOpts(width="800px", height="600px"))
            .add(
                series_name="大专学历招聘",
                data_pair=[list(z) for z in zip(zk_x, zk_y)],
                radius=["50%", "70%"],
                label_opts=opts.LabelOpts(is_show=False, position="center"),
            )
            .set_global_opts(legend_opts=opts.LegendOpts(pos_left="legft", orient="vertical"))
            .set_series_opts(
                tooltip_opts=opts.TooltipOpts(
                    trigger="item", formatter="{a} <br/>{b}: {c} ({d}%)"
                ),
                # label_opts=opts.LabelOpts(formatter="{b}: {c}")
            )
        )
zkpie.render_notebook()

In [83]:
page = Page(layout=Page.DraggablePageLayout)
page.add(
    geo,
    cop,
    treemap,
    pie,
    kmbar,
    szbar,
    bjbar,
    gzbar,
    shbar,
    line,
    zkpie,
)
page.render("数据分析.html")

'C:\\Users\\mac\\BS_数据分析\\数据分析.html'

In [85]:
#page.save_resize_html("数据分析.html", cfg_file="chart_config.json",dest="bs_sjfx.html")

In [110]:
cd = df_bs.loc[df_bs['cotype']=='外企代表处',['cotype','jobname','degree','salary']]

In [111]:
cd

Unnamed: 0,cotype,jobname,degree,salary
443,外企代表处,ERP工程师,本科,10000.0
1052,外企代表处,C#(ACP.NET)网站开发工程师,大专,6000.0
2163,外企代表处,应用工程师-上海,本科,6000.0
3422,外企代表处,技术支持工程师-上海,本科,5500.0
3423,外企代表处,高级技术工程师-上海,本科,6000.0
...,...,...,...,...
142170,外企代表处,游戏开发/动漫设计实习生,大专,6000.0
142172,外企代表处,转行/游戏开发/设计学徒,大专,6000.0
142198,外企代表处,转行/游戏开发工程师实习生,大专,8000.0
142227,外企代表处,VR虚拟开发+应届实习,大专,6000.0


In [98]:
ds = [list(cs) for cs in cd['salary'].value_counts().items()]

In [141]:
c0 = pd.concat([c1,c2,c3,c4,c5,c6,c7,c8], axis=1, sort= False).fillna(0)