In [86]:
### 加载数据
import pandas
import pickle
import numpy
from pyecharts.charts import Bar, Radar
from pyecharts.charts import Pie
from pyecharts import options as opts

SAVE_DATA = False

dataset = pandas.read_csv('dataset.csv')
print(f'Total data: {dataset.shape[0]}')

FileNotFoundError: [Errno 2] No such file or directory: 'dataset.csv'

In [3]:
### 清洗数据
# 洗掉不必要的列
data = dataset.drop(
    [
        'Unnamed: 0',
        '提交答卷时间',
        '所用时间',
        '来源',
        '来源详情',
        '来自IP',
        '1、请问您的学校所在地区和类别：'
    ],
    axis=1
)

# 列名含义，接下来使用下标访问
COLS_NAME = data.columns.values.tolist()
data.columns = [idx for idx in range(len(COLS_NAME))]

# 地区有两个-3的异常值 清洗
data = data[data[91] != -3]

# 14题数据为1or2 因此全体-1 统一处理
data.iloc[:,53:58] = data.iloc[:,53:58] - 1

# 线上学习时间有错误值及空值 0~15
data = data[data[22].isin([x for x in range(16)])].astype(int)

# 输出最后行数
print(f'Data cleaning completed！Total:\n{data.shape[0]}')

Data cleaning completed！Total:
750841


In [4]:
# 导出数据csv
if SAVE_DATA:
    data.to_csv('./cooked.csv')
    with open('./COLS_NAME.dat', 'wb') as f:
        pickle.dump(COLS_NAME, f)

In [18]:
# 富文本饼图设置
PIE_SETTINGS = opts.LabelOpts(
    position="outside",
    formatter="{b|{b}: }{per|{d}%}  ",
    background_color="#eee",
    border_color="#aaa",
    border_width=1,
    border_radius=4,
    rich={
        "a": {"color": "#999", "lineHeight": 22, "align": "center"},
        "abg": {
            "backgroundColor": "#e3e3e3",
            "width": "100%",
            "align": "right",
            "height": 22,
            "borderRadius": [4, 4, 0, 0],
        },
        "hr": {
            "borderColor": "#aaa",
            "width": "100%",
            "borderWidth": 0.5,
            "height": 0,
        },
        "b": {"fontSize": 16, "lineHeight": 33},
        "per": {
            "color": "#eee",
            "backgroundColor": "#334455",
            "padding": [2, 4],
            "borderRadius": 2,
        },
    },
)

In [24]:
# 分析参加调查的学生的年级分布（输出柱状图）
res = [0 for _ in range(12)]
for _, grade in data[str(1)].items():
    res[grade-1] = res[grade-1] + 1

bar = (
    Bar()
    .add_xaxis(["一年级", "二年级", "三年级", "四年级", "五年级", "六年级", "初一", "初二", "初三", "高一", "高二", "高三"])
    .add_yaxis("问卷人数", res)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="学生的年级分布"),
        legend_opts=opts.LegendOpts(is_show=True)
    )
)
bar.render_notebook()

In [49]:
# 统计学生使用设备情况
data = pandas.read_csv("cooked.csv")
keys = ['电视', '台式电脑', '平板', '手机', '音频', '纸质学习资料']
res = [data[str(idx)].value_counts()[1] for idx in range(2,8)]
res = numpy.array(res)
res = res / res.sum()
res = [list(x) for x in zip(keys, res)]

pie = (
    Pie()
    .add(
        "",
        res,
        radius=["40%", "55%"],
        label_opts=PIE_SETTINGS
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="学生上课设备使用情况统计"))
)
pie.render_notebook()

In [50]:
# 统计平台功能使用情况
keys = ['回看课程视频', '作业提交', '随堂测试', '视频会议', '作业批改反馈', '课堂发言', '班级通知', '班级圈', '优秀作业查看', '学科竞赛游戏', '屏幕共享', '弹幕', '讨论']
res = [int(data[str(idx)].value_counts()[1]) for idx in range(8,20)]

bar = (
    Bar()
    .add_xaxis(keys)
    .add_yaxis('使用人数', res)
    .set_global_opts(
        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=30)),
        title_opts=opts.TitleOpts(title="平台功能使用情况"),
        legend_opts=opts.LegendOpts(is_show=True)
    )
)

bar.render_notebook()

In [170]:

data = pandas.read_csv("cooked.csv")
keys = ['20分钟', '20～30分钟', '30～45分钟', '45分钟以上']
res = [data[str(21)].value_counts()[idx] for idx in range(1,5)]
res = numpy.array(res)
res = res / res.sum()
res = [list(x) for x in zip(keys, res)]

pie = (
    Pie()
    .add(
        "",
        res,
        radius=["40%", "55%"],
        label_opts=PIE_SETTINGS
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="学生上课时长情况统计"))
)
pie.render_notebook()

In [None]:
#每天在线学习时间分析
data = pandas.read_csv("cooked.csv")
values = data['22'].value_counts()
res = [values[idx]/values.sum() for idx in range(1,16)]

bar = (
    Bar()
    .add_xaxis(list(range(1,16)))
    .add_yaxis('使用人数占比', list(res))
    .set_global_opts(
        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=30)),
        title_opts=opts.TitleOpts(title="统计学生每天在线学习时间"),
        legend_opts=opts.LegendOpts(is_show=True)
    )
)

bar.render_notebook()
    

In [168]:
keys = ["能专心学习","在监督陪伴下，能专心学习","有时能专心学习，有时不能专心学习","基本不能专心学习","不适应线上学习"]
values = [data['23'].value_counts()[idx] for idx in range(1,6)]
COLUMN_COUNT = sum(values)
values = list(map(lambda x : x/COLUMN_COUNT,values))
res = [list(x) for x in zip(keys,values)]
pie = (
    Pie()
    .add(
        "",
        res,
        label_opts=PIE_SETTINGS
    )
)
pie.render_notebook()





In [167]:
keys = ["完全不需要","有时需要","完全需要"]
values = [data['24'].value_counts()[idx]/COLUMN_COUNT for idx in range(1,4)]
res = [list(x) for x in zip(keys,values)]
pie = (
    Pie()
    .add(
        "",
        res,
        label_opts=PIE_SETTINGS
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="统计学生学习时候是否需要家人陪伴"))
)
pie.render_notebook()

In [166]:
keys = ["直播","录播","资源包","电视课堂","直播+录播","直播+资源包","录播+资源包"
        ,"直播+录播+资源包","录播+资源包+线上辅导答疑"]
values = [data[str(idx)].value_counts()[1]/COLUMN_COUNT for idx in range(25,34)]
bar = (
    Bar()
    .add_xaxis(keys)
    .add_yaxis("喜欢人数占总人数比例",values)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="统计学生喜欢的课堂组织形式"),
        tooltip_opts={"formatter":format('percent: {:.2%}')}
    )
    .set_series_opts(
        label_opts=opts.LabelOpts(
            position="top",
            formatter=JsCode(
                "function(x){return Number(x.data*100).toFixed() + '%';}"
            ),
        )
    )
)
bar.render_notebook()

In [163]:
keys = ["学科课程新课","学科课程复习","音美体劳教育","专题教育"]
values = [data[str(idx)].value_counts()[1]/COLUMN_COUNT for idx in range(34,38)]
bar = (
    Bar()
    .add_xaxis(keys)
    .add_yaxis("表示喜欢的人数占总人数比例",values)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="统计学生对线上课程内容的喜爱情况"),
    )
    .set_series_opts(
        label_opts=opts.LabelOpts(
            position="top",
            formatter=JsCode(
                "function(x){return Number(x.data*100).toFixed() + '%';}"
            ),
        )
    )
)
bar.render_notebook()


In [157]:
keys = ["查阅线上资源自主解决","通过学习平台的视频回放","教师集中时间线上答疑","线上随时利用微信、钉钉等社交平台咨询教师",
        "同学互相交流","暂时放下，待以后解决"]
values = [data[str(idx)].value_counts()[1]/COLUMN_COUNT for idx in range(38,44)]
bar = (
    Bar()
    .add_xaxis(keys)
    .add_yaxis("",values)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="统计学生通过哪些方法解决未掌握知识点"),
    )
    .set_series_opts(
        label_opts=opts.LabelOpts(
            position="top",
            formatter=JsCode(
                "function(x){return Number(x.data*100).toFixed() + '%';}"
            ),
        )
    )
)
bar.render_notebook()


In [155]:
keys = ["不回答问题","偶尔参与回答问题","大多数情况下不能参与回答问题",
        "积极发言,回答问题","课堂没有问答环节，没有机会回答"]
values = [data[str(idx)].value_counts()[1]/COLUMN_COUNT for idx in range(44,49)]
res = [list(x) for x in zip(keys,values)]
pie = (
    Pie()
    .add(
        "",
        res,
        label_opts=PIE_SETTINGS
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="统计学生线上学习互动频率"))
)
pie.render_notebook()

In [154]:
keys = ["与教师沟通不顺畅，问题无法及时解答","线上学习平台体验不好或功能受限","与教师沟通不顺畅，问题无法及时解答",
        "课后作业设置不合理","课程资源质量欠佳","长时间观看屏幕，眼睛疲劳","要求安装的软件平台过多，容易混淆","环境干扰因素多，难以集中学习"]
values = [data[str(idx)].value_counts()[1]/COLUMN_COUNT for idx in range(49,57)]
bar = (
    Bar()
    .add_xaxis(keys)
    .add_yaxis("",values)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="统计学生通过哪些方法解决未掌握知识点"),
    )
    .set_series_opts(
        label_opts=opts.LabelOpts(
            position="top",
            formatter=JsCode(
                "function(x){return Number(x.data*100).toFixed() + '%';}"
            ),
        )
    )
)
bar.render_notebook()



In [142]:
from pyecharts.charts import Bar, Radar
values = [[data[str(idx)].value_counts()[1]/COLUMN_COUNT for idx in range(57,63)]]
print(values)
redar = (
    Radar(init_opts=opts.InitOpts(width="1280px", height="720px", bg_color="#CCCCCC"))
    .add_schema(
        schema=[
            opts.RadarIndicatorItem(name="自主学习能力", max_=1),
            opts.RadarIndicatorItem(name="自控能力", max_=1),
            opts.RadarIndicatorItem(name="数字化资源的利用能力", max_=1),
            opts.RadarIndicatorItem(name="表达沟通", max_=1),
            opts.RadarIndicatorItem(name="生活实践", max_=1),
            opts.RadarIndicatorItem(name="其他", max_=1),
        ],
        splitarea_opt=opts.SplitAreaOpts(
            is_show=True, areastyle_opts=opts.AreaStyleOpts(opacity=1)
        ),
        textstyle_opts=opts.TextStyleOpts(color="#fff"),
    )
    .add(
        series_name="线上学习培养能力",
        data=values,
        linestyle_opts=opts.LineStyleOpts(color="#CD0000"),
    )
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .set_global_opts(
        title_opts=opts.TitleOpts(title="统计学生线上学习培养能力"), legend_opts=opts.LegendOpts()
    )
)
redar.render_notebook()



[[0.32863625, 0.76399125, 0.57577, 0.3379525, 0.24630375, 0.26198375]]


In [152]:
from pyecharts.commons.utils import JsCode
from pyecharts.globals import ThemeType

list2 = [
    {"value": 12, "percent": 12 / (12 + 3)},
    {"value": 23, "percent": 23 / (23 + 21)},
    {"value": 33, "percent": 33 / (33 + 5)},
    {"value": 3, "percent": 3 / (3 + 52)},
    {"value": 33, "percent": 33 / (33 + 43)},
]

list3 = [
    {"value": 3, "percent": 3 / (12 + 3)},
    {"value": 21, "percent": 21 / (23 + 21)},
    {"value": 5, "percent": 5 / (33 + 5)},
    {"value": 52, "percent": 52 / (3 + 52)},
    {"value": 43, "percent": 43 / (33 + 43)},
    {"value": 43, "percent": 43 / (33 + 43)},
    {"value": 43, "percent": 43 / (33 + 43)}
]
level5 = [
    {'value': 178416, 'percent': 0.22302},
    {'value': 175953, 'percent': 0.21994125},
    {'value': 275664, 'percent': 0.34458}, 
    {'value': 254933, 'percent': 0.31866625},
    {'value': 193372, 'percent': 0.241715},
    {'value': 179425, 'percent': 0.22428125},
    {'value': 182981, 'percent': 0.22872625}
]

x_axis = ["直播方式","录播方式","教师教学态度","教师教学水平，效果",
          "资源内容","线上学习平台","总体满意度"]
level1=list([{"value":data[str(idx)].value_counts()[1]/COLUMN_COUNT ,"percent":data[str(idx)].value_counts()[1]/COLUMN_COUNT} for idx in range(64,71)])
level2=[{"value":data[str(idx)].value_counts()[2]/COLUMN_COUNT,"percent":data[str(idx)].value_counts()[2]/COLUMN_COUNT} for idx in range(64,71)]
level3=[{"value":data[str(idx)].value_counts()[3]/COLUMN_COUNT ,"percent":data[str(idx)].value_counts()[3]/COLUMN_COUNT} for idx in range(64,71)]
level4=[{"value":data[str(idx)].value_counts()[4]/COLUMN_COUNT,"percent":data[str(idx)].value_counts()[4]/COLUMN_COUNT} for idx in range(64,71)]

print(level1)
print(len(x_axis))
bar = (
    Bar()
    .add_xaxis(x_axis)
    .add_yaxis("非常满意", level1, stack="stack1", category_gap="25%")
    .add_yaxis("满意",level2, stack="stack1", category_gap="25%")
    .add_yaxis("一般",level3, stack="stack1", category_gap="25%")
    .add_yaxis("不满意",level4, stack="stack1", category_gap="25%")
    .set_series_opts(
        label_opts=opts.LabelOpts(
            position="right",
            formatter=JsCode(
                "function(x){return Number(x.data.percent * 100).toFixed() + '%';}"
            ),
        )
    )
)
bar.render_notebook()


[{'value': 0.22302, 'percent': 0.22302}, {'value': 0.21994125, 'percent': 0.21994125}, {'value': 0.34458, 'percent': 0.34458}, {'value': 0.31866625, 'percent': 0.31866625}, {'value': 0.241715, 'percent': 0.241715}, {'value': 0.22428125, 'percent': 0.22428125}, {'value': 0.22872625, 'percent': 0.22872625}]
7
