# 导入库与数据

In [115]:
import pandas as pd 
import numpy as np
from gm11 import gm11
from pyecharts import options as opts
from pyecharts.charts import *

In [108]:
province_score = pd.read_csv('data/14-19年各省高考分数线.csv')
school_score = pd.read_csv('data/14-19年高校分数线.csv')
school_sort = pd.read_csv('data/2020年中国大学排名.csv')
school_info = pd.read_csv('data/学信网院校信息.csv')
display(province_score.head(3))
display(school_score.head(3))
display(school_sort.head(3))
display(school_info.head(3))

Unnamed: 0,地区,年份,考生类别,批次,分数线
0,北京,2014,理科,本科一批,543
1,北京,2014,理科,本科二批,495
2,北京,2014,理科,本科三批,466


Unnamed: 0,学校名称,平均分,年份,录取批次,招生地址,招生类型,最低分,省份,省控线,考生类别
0,广州航海学院,480,2014,本科提前批,黑龙江,普通类,-,广东,-,理科
1,广州航海学院,507,2014,本科二批,江西,普通类,-,广东,-,理科
2,广州航海学院,533,2014,本科二批,河北,普通类,-,广东,-,理科


Unnamed: 0,排名,学校名称,省市,学校类型,总分,办学层次得分,学科水平得分,办学资源得分,师资规模与结构得分,人才培养得分,科学研究得分,社会服务得分,高端人才得分,重大项目与成果得分,国际竞争力得分
0,1,清华大学,北京,综合,852.5,38.2,72.4,39.6,48.4,256.8,69.1,40.6,76.5,131.0,79.9
1,2,北京大学,北京,综合,746.7,36.1,73.1,24.6,49.2,237.6,71.0,16.2,71.9,105.8,61.2
2,3,浙江大学,浙江,综合,649.2,33.9,65.3,20.1,48.3,215.3,68.6,23.9,49.1,81.7,43.0


Unnamed: 0.1,Unnamed: 0,院校名称,院校所在地,教育行政主管部门,院校类型,学历层次,一流大学建设高校,一流学科建设高校,研究生院,综合满意度,院校环境满意度,生活满意度
0,0,北京大学,北京,教育部,综合,本科,1,0,1,4.7,4.6,4.4
1,1,中国人民大学,北京,教育部,综合,本科,1,0,1,4.8,4.5,4.3
2,2,清华大学,北京,教育部,综合,本科,1,0,1,4.8,4.9,4.8


# 各个省每年文理一本二本分数线

## 一本文理科

In [169]:
# 创建选项卡对象
tab = Tab()
# 创建极坐标对象，并设置主题为 dark
polar = Polar(init_opts=opts.InitOpts(theme='dark'))
polar.add_schema(
            radiusaxis_opts=opts.RadiusAxisOpts(
                # 设置极坐标的坐标轴的值
                data=[2014, 2015, 2016, 2017, 2018, 2019], 
                # 坐标轴类型设置为离散型                               
                type_="category",
                # 分割线设置，设置为不显示分割线，就是背景那些网格线条                               
                splitline_opts=opts.SplitLineOpts(is_show=False),
                # 坐标轴线风格配置，坐标轴轴线设置为不显示                          
                axisline_opts=opts.AxisLineOpts(is_show=False),
                # 坐标轴刻度设置为不显示                               
                axistick_opts=opts.AxisTickOpts(is_show=False)),
            angleaxis_opts=opts.AngleAxisOpts(
                # 值的方向，True 为顺时针，False 为逆时针
                is_clockwise=True,
                # 极坐标最大值
                max_=750,
                # 与上面一样，需要同时设置，否则报错                             
                splitline_opts=opts.SplitLineOpts(is_show=False),
                # 与上面一样，需要同时设置，否则报错                                
                axisline_opts=opts.AxisLineOpts(is_show=False),
                # 是否显示标签，设置为不显示                              
                axislabel_opts=opts.LabelOpts(is_show=False),
                # 与上面一样，需要同时设置，否则报错                                
                axistick_opts=opts.AxisTickOpts(is_show=False)),
        )

# 以地区为切换依据，循环添加进图表
for area in province_score['地区'].unique():
    # 选择对应地区
    t_data = province_score[(province_score['地区'] == area) & 
                            (province_score['批次'] == '本科一批') & 
                            (province_score['考生类别'] == '理科')]
    # 以年份进行排序，降序
    t_data = t_data.sort_values(by = '年份' ,ascending = True)
    # 添加地区对应的分数，显示类型设置为条形
    polar.add(area, t_data['分数线'].tolist(), type_="bar")
    polar.set_global_opts(
        legend_opts=opts.LegendOpts(
            # 显示图例，就是各个地区的切换按钮
            is_show=True, 
            # 图例设置为单选模式
            selected_mode='single',
            # 图例的位置
            pos_top='5%', pos_right='5%',
            # 图例的布局朝向，垂直
            orient='vertical', 
            # 图例形状
            legend_icon='circle'),
        title_opts=opts.TitleOpts(
            # 标题
            title="【理科】2014-2019年一本分数线",
            # 标题位置
            pos_left="center",
            # 标题字体大小设置为20
            title_textstyle_opts=opts.TextStyleOpts(font_size=20)),
        visualmap_opts=opts.VisualMapOpts(
            # 视觉映射不显示
            is_show=False, 
            # 视觉映射最大最小值
            min_=0,max_=5,
            # 是否为分段形
            is_piecewise=False,
            # 组件映射维度
            dimension=0,
            # 组件过渡颜色
            range_color=['#203fb6', '#008afb', '#00FF00', '#ffec4a', '#ff6611', '#f62336']))
    polar.set_series_opts(
        label_opts=opts.LabelOpts(
            # 是否显示标签
            is_show=True, 
            # 回调函数
            formatter='{c}'), 
        # 设置锐化，饱和度
        itemstyle_opts={"normal": {
                                        'shadowBlur': 20,
                                        'shadowColor': 'rgba(0,191,255,0.5)',
                                        'shadowOffsetY': 2,
                                        'opacity': 0.8
                                                    }
        })
# 添加进选项卡
tab.add(polar, '理科')

polar = Polar(init_opts=opts.InitOpts(theme='dark'))
polar.add_schema(
            radiusaxis_opts=opts.RadiusAxisOpts(data=[2014, 2015, 2016, 2017, 2018, 2019], 
                                                type_="category",
                                                splitline_opts=opts.SplitLineOpts(is_show=False),
                                                axisline_opts=opts.AxisLineOpts(is_show=False),
                                                axistick_opts=opts.AxisTickOpts(is_show=False)),
            angleaxis_opts=opts.AngleAxisOpts(is_clockwise=True, max_=750,
                                              splitline_opts=opts.SplitLineOpts(is_show=False),
                                              axisline_opts=opts.AxisLineOpts(is_show=False),
                                              axislabel_opts=opts.LabelOpts(is_show=False),
                                              axistick_opts=opts.AxisTickOpts(is_show=False)),
        )
for area in province_score['地区'].unique():
    t_data = province_score[(province_score['地区'] == area) & 
                            (province_score['批次'] == '本科一批') & 
                            (province_score['考生类别'] == '文科')]
    t_data = t_data.sort_values(by = '年份' ,ascending = True)
    

    polar.add(area, t_data['分数线'].tolist(), type_="bar")
    polar.set_global_opts(legend_opts=opts.LegendOpts(is_show=True, selected_mode='single', 
                                                      pos_top='5%', pos_right='5%',
                                                      orient='vertical', legend_icon='circle'),
                          title_opts=opts.TitleOpts(title="【文科】2014-2019年一本分数线",
                                                  pos_left="center",
                                                  title_textstyle_opts=opts.TextStyleOpts(font_size=20)),
                          visualmap_opts=opts.VisualMapOpts(is_show=False, 
                                              min_=0,
                                              max_=5,
                                              is_piecewise=False,
                                              dimension=0,
                                              range_color=['#203fb6', '#008afb', '#00FF00', '#ffec4a', '#ff6611', '#f62336']))
    polar.set_series_opts(label_opts=opts.LabelOpts(is_show=True, formatter='{c}'), 
                          itemstyle_opts={"normal": {
                                                        'shadowBlur': 20,
                                                        'shadowColor': 'rgba(0,191,255,0.5)',
                                                        'shadowOffsetY': 2,
                                                        'opacity': 0.8
                                                    }
                                           })


tab.add(polar, '文科')
tab.render('2014-2019年一本分数线.html')
tab.render_notebook()

## 二本文理科

In [170]:
tab = Tab()

polar = Polar(init_opts=opts.InitOpts(theme='dark'))
polar.add_schema(
            radiusaxis_opts=opts.RadiusAxisOpts(data=[2014, 2015, 2016, 2017, 2018, 2019], 
                                                type_="category",
                                                splitline_opts=opts.SplitLineOpts(is_show=False),
                                                axisline_opts=opts.AxisLineOpts(is_show=False),
                                                axistick_opts=opts.AxisTickOpts(is_show=False)),
            angleaxis_opts=opts.AngleAxisOpts(is_clockwise=True, max_=750,
                                              splitline_opts=opts.SplitLineOpts(is_show=False),
                                              axisline_opts=opts.AxisLineOpts(is_show=False),
                                              axislabel_opts=opts.LabelOpts(is_show=False),
                                              axistick_opts=opts.AxisTickOpts(is_show=False)),
        )
for area in province_score['地区'].unique():
    t_data = province_score[(province_score['地区'] == area) & 
                            (province_score['批次'] == '本科二批') & 
                            (province_score['考生类别'] == '理科')]
    t_data = t_data.sort_values(by = '年份' ,ascending = True)
    

    polar.add(area, t_data['分数线'].tolist(), type_="bar")
    polar.set_global_opts(legend_opts=opts.LegendOpts(is_show=True, selected_mode='single', 
                                                      pos_top='5%', pos_right='5%',
                                                      orient='vertical', legend_icon='circle'),
                          title_opts=opts.TitleOpts(title="【理科】2014-2019年二本分数线",
                                                  pos_left="center",
                                                  title_textstyle_opts=opts.TextStyleOpts(font_size=20)),
                          visualmap_opts=opts.VisualMapOpts(is_show=False, 
                                              min_=0,
                                              max_=5,
                                              is_piecewise=False,
                                              dimension=0,
                                              range_color=['#203fb6', '#008afb', '#00FF00', '#ffec4a', '#ff6611', '#f62336']))
    polar.set_series_opts(label_opts=opts.LabelOpts(is_show=True, formatter='{c}'), 
                          itemstyle_opts={"normal": {
                                                        'shadowBlur': 20,
                                                        'shadowColor': 'rgba(0,191,255,0.5)',
                                                        'shadowOffsetY': 2,
                                                        'opacity': 0.8
                                                    }
                                           })

tab.add(polar, '理科')

polar = Polar(init_opts=opts.InitOpts(theme='dark'))
polar.add_schema(
            radiusaxis_opts=opts.RadiusAxisOpts(data=[2014, 2015, 2016, 2017, 2018, 2019], 
                                                type_="category",
                                                splitline_opts=opts.SplitLineOpts(is_show=False),
                                                axisline_opts=opts.AxisLineOpts(is_show=False),
                                                axistick_opts=opts.AxisTickOpts(is_show=False)),
            angleaxis_opts=opts.AngleAxisOpts(is_clockwise=True, max_=750,
                                              splitline_opts=opts.SplitLineOpts(is_show=False),
                                              axisline_opts=opts.AxisLineOpts(is_show=False),
                                              axislabel_opts=opts.LabelOpts(is_show=False),
                                              axistick_opts=opts.AxisTickOpts(is_show=False)),
        )
for area in province_score['地区'].unique():
    t_data = province_score[(province_score['地区'] == area) & 
                            (province_score['批次'] == '本科二批') & 
                            (province_score['考生类别'] == '文科')]
    t_data = t_data.sort_values(by = '年份' ,ascending = True)
    

    polar.add(area, t_data['分数线'].tolist(), type_="bar")
    polar.set_global_opts(legend_opts=opts.LegendOpts(is_show=True, selected_mode='single', 
                                                      pos_top='5%', pos_right='5%',
                                                      orient='vertical', legend_icon='circle'),
                          title_opts=opts.TitleOpts(title="【文科】2014-2019年二本分数线",
                                                  pos_left="center",
                                                  title_textstyle_opts=opts.TextStyleOpts(font_size=20)),
                          visualmap_opts=opts.VisualMapOpts(is_show=False, 
                                              min_=0,
                                              max_=5,
                                              is_piecewise=False,
                                              dimension=0,
                                              range_color=['#203fb6', '#008afb', '#00FF00', '#ffec4a', '#ff6611', '#f62336']))
    polar.set_series_opts(label_opts=opts.LabelOpts(is_show=True, formatter='{c}'), 
                          itemstyle_opts={"normal": {
                                                        'shadowBlur': 20,
                                                        'shadowColor': 'rgba(0,191,255,0.5)',
                                                        'shadowOffsetY': 2,
                                                        'opacity': 0.8
                                                    }
                                           })


tab.add(polar, '文科')
tab.render('2014-2019年二本分数线.html')
tab.render_notebook()

# 20年各个省份一本二本文科理科分数线预测

In [92]:
pici = ['本科一批','本科二批']
kemu = ['理科','文科']
prov = list(province_score['地区'].unique())

result = []
for p in prov:
    for k in kemu:
        for pi in pici:
            # 获得地区，类别，批次
            two_wen = province_score[(province_score['地区'] == p) &
                              (province_score['考生类别'] == k) &
                              (province_score['批次'] == pi)]
            
            # 预测
            f = gm11(two_wen.loc[:, '分数线'].values)[0]
            d = f(len(two_wen))    
            d = d.round(0) 
            
            # 封装成列表
            score = [p,k,pi,d]
            result.append(score)

# 转换为 DataFrame 保存
result = pd.DataFrame(result,columns=['地区','考生类别','批次','分数线'])
result.to_csv(path_or_buf='20年各省一本二本分数线预测.csv',index=False)

  f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2))    # 还原值
  P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)


# sad