In [13]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import pandas as pd
from Util.path_config import *

def get_grades_data(years,bins=[0, 0.3, 0.7, 1],labels=[0,1,2]):
    grades_list = []
    for year in years:
        grades = pd.read_excel("./data/scores/{}_scores.xlsx".format(year), header=[0, 1], index_col=0)['成绩']
        grades['target'] = pd.qcut(grades['期末'], bins, labels=labels)
        grades_list.append(grades['target'])
    return grades_list
class AllMethodResults():
    def __init__(self, target_years):
        self.results = {}

        self.target_years = target_years

        self.debug = 1

    # 添加一个方法的结果
    def add(self, method_name, results):
        if self.debug == 0:

            self.results[method_name] = results
        else:
            print(f"Debug: {method_name}, {results}")


    def to_dataframe(self):
        # 创建一个DataFrame，行为方法名，列为年份
        df = pd.DataFrame.from_dict(self.results, orient='index', columns=self.target_years)
        df.reset_index(inplace=True)
        df.rename(columns={'index': 'method'}, inplace=True)
        return df


focus_student = 0
methods_results_dict=[]
target_years=range(2017,2024)


results_obj = AllMethodResults(target_years)
results_obj.debug=0



In [16]:
data_list = [pd.read_excel("./data/answers/{}_answers.xlsx".format(i), header=[0, 1], index_col=0) for i in range(2016,2024)]
labels_list = get_grades_data(range(2016,2024))
print(labels_list[0])


1     0
2     0
3     2
4     2
5     2
     ..
72    0
73    1
74    0
75    2
76    1
Name: target, Length: 76, dtype: category
Categories (3, int64): [0 < 1 < 2]


# LASA

## random init

In [17]:
import scipy
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler


class MCMM():
    def __init__(self, n_components, n_features, n_values,weights=None,probs=None):
        self.n_components = n_components
        self.n_features = n_features
        self.n_values = n_values
        self.tol=1e-4
        # 初始化参数
        if weights is None:
            self.weights = np.ones(n_components) / n_components
        else:
            self.weights = weights
        if probs is None:
            self.probs = np.random.dirichlet(np.ones(n_values), n_features * n_components).reshape(
            (n_components, n_features, n_values))
        else:
            self.probs = probs
        self.max_iter = 100

    def log_likelihood(self,X,weights,probs):
        log_likelihood = 0
        for n in range(X.shape[0]):
            temp = 0
            for k in range(self.n_components):
                temp += weights[k] * np.prod(np.prod(np.power(probs[k, :], X[n, :]), axis=1))
            log_likelihood += np.log(temp)
        return log_likelihood


    def e_step(self,X,weights,probs):
        responsibilities = np.zeros((X.shape[0], self.n_components))
        for k in range(self.n_components):
            responsibilities[:, k] = weights[k] * np.prod(np.prod(np.power(probs[k, :], X), axis=2),axis=1)
        responsibilities /= responsibilities.sum(axis=1, keepdims=True)
        return responsibilities

    def m_step(self,X,responsibilities,probs):
        effective_counts = responsibilities.sum(axis=0)

        weights = effective_counts / X.shape[0]

        for k in range(self.n_components):
            probs[k, :] = np.sum(responsibilities[:, k][:, np.newaxis,np.newaxis] * X, axis=0)
            probs[k, :] /= effective_counts[k]
        return weights,probs

    def fit(self,X):
        prev_ll=-np.inf
        for it in range(self.max_iter):
            responsibilities = self.e_step(X,self.weights,self.probs)
            self.weights,self.probs = self.m_step(X,responsibilities,self.probs)
            ll = self.log_likelihood(X,self.weights,self.probs)
            if np.abs(ll-prev_ll)<self.tol:
                break
            prev_ll=ll

        return self.weights,self.probs,ll

    def predict(self,X):
        responsibilities = self.e_step(X,self.weights,self.probs)
        return responsibilities.argmax(axis=1)
# Learning Ability Modeling
class LAM():
    def __init__(self):
        pass

    def fit_transform(self,X,n_components=3,weight=None,probs=None):
        X_t = X.values.transpose()
        n_rows, n_cols = X_t.shape
        n_values = 3
        one_hot_matrix = np.zeros((n_rows, n_cols, n_values))
        one_hot_matrix[np.arange(n_rows)[:, None], np.arange(n_cols), X_t+1] = 1
        n_features = n_cols
        import copy
        weight,probs = copy.deepcopy(weight),copy.deepcopy(probs)
        mcmm = MCMM(n_components=n_components,n_features=n_features,n_values=n_values,weights=weight,probs=probs)
        pi,theta,ll = mcmm.fit(one_hot_matrix)
        sort_value = [theta[i,:,2].mean() for i in range(n_components)]
        sort_index = np.argsort(sort_value)
        theta = theta[sort_index]
        theta = theta.transpose(1,0,2)
        return pi,theta
    
# Learning Ability Adaptation
class LAA():
    def run(self,n_iters=1,focus_student = None,params=None):
        def distribution_alignment(Xs, Xt):
            cov_src = np.cov(Xs.T) + np.eye(Xs.shape[1])
            cov_tar = np.cov(Xt.T) + np.eye(Xt.shape[1])
            A_coral = np.dot(scipy.linalg.fractional_matrix_power(cov_src, -0.5),
                             scipy.linalg.fractional_matrix_power(cov_tar, 0.5))
            np.linalg.multi_dot([Xs, scipy.linalg.fractional_matrix_power(cov_src, -0.5), scipy.linalg.fractional_matrix_power(cov_tar, 0.5)])
            Xs_new = np.real(np.dot(Xs, A_coral))
            return Xs_new


        results_list=[]

        for _ in range(n_iters):
            # learning ability modeling module

            lam = LAM()

            learning_ability_data_list= []
            for i,data in enumerate(data_list):
                if params is None:
                    pi,learning_ability = lam.fit_transform(data,n_components=3)
                else:
                    pi,learning_ability = lam.fit_transform(data,n_components=3,weight=params[i][0],probs=params[i][1].transpose(1,0,2).reshape(3,-1,3))

                learning_ability = learning_ability[:,:,:].reshape(learning_ability.shape[0],-1)
                learning_ability_data_list.append(learning_ability)

            standardized_learning_ability_data_list = []
            for learning_ability in learning_ability_data_list:
                standardized_learning_ability = StandardScaler().fit_transform(learning_ability)
                standardized_learning_ability_data_list.append(standardized_learning_ability)

           
            adapted_learning_ability_data_list_source = [] 
            for t in range(len(standardized_learning_ability_data_list)):
                temp = []
                for s in range(len(standardized_learning_ability_data_list)):
                    if s==t:
                        temp.append(standardized_learning_ability_data_list[t])
                    elif s<t:
                        temp.append(distribution_alignment(standardized_learning_ability_data_list[s],standardized_learning_ability_data_list[t]))
                        # temp.append(standardized_learning_ability_data_list[s])
                    else:
                        pass
                adapted_learning_ability_data_list_source.append(temp)
            results=[]
            for year_target in range(2017,2024):
                source_X = np.vstack(adapted_learning_ability_data_list_source[year_target-2016][:year_target-2016])
                if focus_student is not None:
                    source_Y = np.hstack(labels_list[:year_target-2016])==focus_student
                else:
                    source_Y = pd.concat(labels_list[:year_target-2016],axis=0)
                # print(source_Y)
                target_X = adapted_learning_ability_data_list_source[year_target-2016][year_target-2016]
                if focus_student is not None:
                    target_Y = labels_list[year_target-2016]==focus_student
                else:
                    target_Y = labels_list[year_target-2016]


                SVC_model = SVC(**{'C': 1, 'kernel': 'linear'})

                SVC_model.fit(source_X,source_Y)

                target_pred = SVC_model.predict(target_X)
                results.append(accuracy_score(target_Y,target_pred))
            print(results)
            results_list.append(results)
        return results_list


np.random.seed(56)

laa = LAA()
results_list=laa.run(n_iters=15,focus_student=focus_student,params=None)

results = np.array(results_list)
mean_result = np.mean(results_list, axis=0)
print( "mean_mean_result:", np.mean(mean_result))

[0.8051948051948052, 0.7938931297709924, 0.8256880733944955, 0.8225806451612904, 0.6593406593406593, 0.7865853658536586, 0.810126582278481]
[0.7402597402597403, 0.816793893129771, 0.8073394495412844, 0.8467741935483871, 0.7362637362637363, 0.8109756097560976, 0.7974683544303798]
[0.7402597402597403, 0.7938931297709924, 0.8256880733944955, 0.8225806451612904, 0.6373626373626373, 0.8048780487804879, 0.7721518987341772]
[0.7532467532467533, 0.8320610687022901, 0.8256880733944955, 0.8225806451612904, 0.6153846153846154, 0.7987804878048781, 0.7848101265822784]
[0.7792207792207793, 0.8015267175572519, 0.8440366972477065, 0.8306451612903226, 0.6703296703296703, 0.7926829268292683, 0.7848101265822784]
[0.7402597402597403, 0.8091603053435115, 0.7981651376146789, 0.7903225806451613, 0.6593406593406593, 0.7987804878048781, 0.7974683544303798]
[0.7922077922077922, 0.8320610687022901, 0.8073394495412844, 0.8306451612903226, 0.6373626373626373, 0.7865853658536586, 0.7721518987341772]
[0.766233766233

## Hierarchical init

In [18]:
laa = LAA()
import pickle
with open('initial_params.pkl','rb') as f:
    params = pickle.load(f)
results_list=laa.run(n_iters=1,focus_student=focus_student,params=params)

results = np.array(results_list)
mean_result = np.mean(results_list, axis=0)
print( "mean_mean_result:", np.mean(mean_result))

[0.8051948051948052, 0.7709923664122137, 0.8348623853211009, 0.8387096774193549, 0.7802197802197802, 0.823170731707317, 0.810126582278481]
mean_mean_result: 0.809039475507579
