In [7]:
import pandas as pd
import statistics

# 读取CSV文件
df = pd.read_csv('final_result.csv')

# 创建一个字典来存储每个clazzid的数据
clazz_data = df.groupby('clazzid')

# 计算每个clazzid的相关指标
result = []
for clazz_id, records in clazz_data:
    # 筛选出role=1的老师记录
    teacher_records = records[records['role'] == 1]

    if not teacher_records.empty:
        # 获取老师的personid和courseid
        teacher_personid = teacher_records.iloc[0]['personid']
        courseid = teacher_records.iloc[0]['courseid']

        # 计算role=3的学生指标
        students_records = records[records['role'] == 3]

        if not students_records.empty:
            avg_exam_score = students_records['mean_score'].mean()
            avg_activity_participation = students_records['count_activity_percentage'].mean()
            avg_exam_participation = students_records['count_percentage'].mean()
            variance_score = students_records['std_score'].mean()

            # 获取班级发布活动次数和考试次数（不重复计数）
            total_activity_count = records['activity_count_b'].mean()
            total_exam_count = records['clazz_unique_exam_count'].mean()

            result.append({
                "clazzid": clazz_id,
                "teacher_personid": teacher_personid,
                "courseid": courseid,
                "avg_exam_score": avg_exam_score,
                "avg_activity_participation": avg_activity_participation,
                "avg_exam_participation": avg_exam_participation,
                "variance_score": variance_score,
                "total_activity_count": total_activity_count,
                "total_exam_count": total_exam_count
            })

# 转换为DataFrame
result_df = pd.DataFrame(result)

# 打印结果前10条
print(result_df.head(10))

# 将结果导出到CSV文件
result_df.to_csv('clazz_result.csv', index=False)


   clazzid  teacher_personid  courseid  avg_exam_score  \
0  4198432          28407465   1763844             NaN   
1  4198433          28409990   1763846             NaN   
2  4198434          28408316   1763843             NaN   
3  4198435          28406854   1763845             NaN   
4  4198436          28410829   1763847             NaN   
5  4198437          28406415   1763848             NaN   
6  4198438          28406415   1763848             NaN   
7  4198439          28406415   1763848             NaN   
8  4198440          28409805   1763849             NaN   
9  4198441          28408664   1763850             NaN   

   avg_activity_participation  avg_exam_participation  variance_score  \
0                         NaN                     NaN             NaN   
1                         NaN                     NaN             NaN   
2                         NaN                     NaN             NaN   
3                         NaN                     NaN             NaN