In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

In [2]:

DATA_DIR = '/Users/Rambo/Downloads/highschool_data/'
DATA_OUTPUT_DIR = "/Users/Rambo/Documents/GZ_output/"

SUBJECT = ['yuwen','shuxue','yingyu','tongyongjishu','wuli','huaxue','shengwu','zhengzhi','lishi','dili']

COLUMNS = {"试题编号","参考学生数","得分人数","得分率","平均分","难度","标准差","斯皮尔曼区分度","皮尔顺区分度","A等平均分","B等平均分","C等平均分","D等平均分"}

RANK_MAP = {"dili":"Dlfd","yuwen":"Ywfd","huaxue":"Hxfd","lishi":"Lsfd","shengwu":"Swfd","wuli":"Wlfd",
            "zhengzhi":"Zzfd","tiyu":"Tyfd","shuxue":"Sxfd","yingyu":"Yyfd","tongyongjishu":"xxjsfd"}

SUBJECT_MAP = {"dili":"地理","yuwen":"语文","huaxue":"化学","lishi":"历史","shengwu":"生物","wuli":"物理",
            "zhengzhi":"政治","tiyu":"体育","shuxue":"数学","yingyu":"英语","tongyongjishu":"信息技术"}

CITY_MAP = {1:"贵阳",2:"遵义",3:"安顺",4:"毕节",5:"铜仁",6:"六盘水",7:"黔南州",8:"黔东南",9:"黔西南"}

def dirToString(subject,type):
    typeToDir={
        "item":DATA_DIR+subject+'ItemInfor.csv',
        "score":DATA_DIR+subject+'Score.txt',
        "rank":DATA_DIR+"等级数据.txt",
        "option":DATA_DIR+subject+"Options.txt"
    }
    return typeToDir.get(type)


In [3]:
subject = "shuxue"
#def scoreRangePlot(subject):
#open score file
dfscore = pd.read_csv(dirToString(subject, "score"), delimiter='\t')
# fullscore
dfitem = pd.read_csv(dirToString(subject, "item"))
fullscore = dfitem.loc[dfitem["shitihao"] == "totalScore"]["full"].iloc[0]
# noabsent
dfscore_noabsent = dfscore.loc[dfscore["absent"] == "N"]
# index
begin = np.linspace(0, 140, 15).tolist()
end = np.linspace(10, 150, 15).tolist()
# 考虑到满分不在[140,150)
series_city_fullscore = dfscore_noabsent.loc[(dfscore_noabsent["totalScore"] == fullscore)].groupby("cityID")[
    "absent"].count().rename("满分")
series_list = [
    dfscore_noabsent.loc[(dfscore_noabsent["totalScore"] >= i) & (dfscore_noabsent["totalScore"] < j)].groupby(
        "cityID")["absent"].count().rename("[" + str(np.int(i)) + "," + str(np.int(j)) + ")") for i, j in
    zip(begin, end)]
series_list = series_list + [series_city_fullscore]
df = pd.concat(series_list,axis=1,join="outer").fillna(0)

#出各市州的分数段条形图
#设置汉字字体，以及-正常显示
mpl.rcParams['font.sans-serif'] = ['FangSong']
mpl.rcParams['font.serif'] = ['FandSong']
mpl.rcParams['axes.unicode_minus'] = False

for i in range(len(df)):
    #设置图片大小
    fig = plt.figure(num=i,figsize=(6,4))
    #x轴的ticks
    xlabels = df.iloc[i].index.tolist()
    #定义每个bar的位置
    left = np.arange(len(xlabels)).tolist()
    #bar的高度
    height = df.iloc[i].values.tolist()
    #画条形图
    plt.bar(left,height,align="center",color='#B5B5B5',linewidth=1,edgecolor="K",alpha=0.8)
    #设置x轴的ticks
    plt.xticks(left,xlabels,size="small",rotation=35)

    ax = plt.gca()
    #标题
    name = CITY_MAP[df.iloc[i].name]
    ax.set_title(name)
    ax.spines["right"].set_color('none')
    ax.spines["top"].set_color('none')
    #plt.rcParams['savefig.dpi'] = 200
    #plt.rcParams['figure.dpi'] = 200
    plt.savefig(DATA_OUTPUT_DIR+SUBJECT_MAP[subject]+"总分分数段分布图("+str(name)+")",dpi=400)
    plt.close(i)
    print(height)


    

[3, 10, 70, 221, 335, 575, 899, 1098, 1406, 2169, 3198, 4614, 5969, 6260, 2818, 100]
[23, 40, 175, 770, 1420, 2329, 3578, 4086, 4914, 6227, 7454, 8446, 8766, 6783, 2123, 41]
[2, 21, 131, 573, 791, 972, 1310, 1170, 1370, 1609, 1939, 2148, 1911, 1384, 439, 3]
[4, 73, 374, 1819, 2813, 3684, 4813, 5042, 5971, 7297, 8611, 9492, 8356, 5305, 1094, 8]
[45, 35, 285, 874, 1099, 1543, 1991, 2478, 3224, 4479, 5569, 6159, 5085, 3463, 787, 8]
[3, 25, 123, 582, 952, 1394, 1797, 1874, 2172, 2903, 3530, 4236, 3764, 2398, 548, 9]
[7, 28, 142, 670, 1018, 1369, 1758, 1816, 2005, 2552, 3076, 3714, 3481, 2574, 714, 7]
[5, 16, 75, 409, 608, 1041, 1566, 1810, 2308, 3180, 4291, 5561, 5405, 4111, 1066, 17]
[10, 24, 176, 782, 1055, 1406, 1572, 1634, 1966, 2496, 2898, 3534, 3405, 2315, 665, 18]
