In [45]:
import pandas as pd
import json, os

In [46]:
years = ['19-20','20-21','21-22','22-23','23-24']
fold_nums = range(5)
sample_nums = range(1,11)
dkt_wy_dict = {}
dkt_cy_dict = {}
for y in years: # DKT
	dkt_wy_dict[y] = []
	for f in fold_nums:
		with open(f'./Data/second_round_data/wy_DKT_{y}_{f}.json') as wyj:
			obj = json.load(wyj)
			dkt_wy_dict[y].append(obj[y][str(f)])
	if y != '23-24':
		dkt_cy_dict[y] = {}
		for s in sample_nums:
			with open(f'./Data/second_round_data/cy_DKT_{y}_{s}.json') as cyj:
				obj = json.load(cyj)
				for test_year, measurement in obj.items():
					if dkt_cy_dict[y].get(test_year) is None:
						dkt_cy_dict[y][test_year] = []
					dkt_cy_dict[y][test_year].append(measurement)

In [47]:
models = ['BKT', 'PFA', 'SAKT-KC', 'SAKT-E']

In [48]:
wy_dicts = {}
cy_dicts = {}
wy_dicts['DKT'] = dkt_wy_dict
cy_dicts['DKT'] = dkt_cy_dict
for model in models:
    mod_cy_dict = {}
    mod_wy_dict = {}
    is_sakt = model == 'SAKT-KC' or model == 'SAKT-E'
    for year in years:
        with open(f'./Data/second_round_data/within_year_results_{model}_{year}.json') as wyj:
            obj = json.load(wyj)
            if is_sakt:
                for key, val in obj.items():
                    obj[key] = val[0]
            mod_wy_dict[year] = obj
        if year != '23-24':
            with open(f'./Data/second_round_data/cross_year_results_{model}_{year}.json') as cyj:
                obj = json.load(cyj)
                if is_sakt:
                    for key, val in obj.items():
                        for k2, v2 in val.items():
                            obj[key][k2] = v2[0]
                mod_cy_dict[year] = obj

    cy_dicts[model] = mod_cy_dict
    wy_dicts[model] = mod_wy_dict

In [49]:
redone_cy = {}
for mod in models:
    mod_cy = {}
    for train_year in years:
        if train_year == '23-24':
            continue
        mod_cy[train_year] = {}
        for s in sample_nums:
            for key, val in cy_dicts[mod][train_year][str(s)].items():
                if key == train_year:
                    continue
                if mod_cy[train_year].get(key) is None:
                    mod_cy[train_year][key] = []
                mod_cy[train_year][key].append(val)
    redone_cy[mod] = mod_cy

In [60]:
redone_wy = {}
for mod in models:
    mod_wy = {}
    for train_year in years:
        if train_year == '23-24':
            continue
        mod_wy[train_year] = []
        for key, val in wy_dicts[mod][train_year].items():
                mod_wy[train_year].append(val)
    redone_wy[mod] = mod_wy

In [67]:
redone_cy['DKT'] = dkt_cy_dict
redone_wy['DKT'] = dkt_wy_dict

In [61]:
redone_wy['BKT']

{'19-20': [0.820314625060368,
  0.8410271557317517,
  0.827390473949196,
  0.8267356404104393,
  0.8203961597158222],
 '20-21': [0.8257860500862885,
  0.8354595552311399,
  0.8203527645532642,
  0.8307305156459882,
  0.8322254092962685],
 '21-22': [0.8749735078632531,
  0.8672923017319332,
  0.8684401559042085,
  0.868923640218259,
  0.8677677230571645],
 '22-23': [0.8279145765141375,
  0.8416995225697905,
  0.8301693062340875,
  0.8274948921446882,
  0.8171236811218481]}

In [68]:
res = []
for model, m_dict in redone_cy.items():
    for train_y, train_y_dict in m_dict.items():
        for eval_y, eval_list in train_y_dict.items():
            train_idx = years.index(train_y)
            eval_idx = years.index(eval_y)
            years_between = eval_idx - train_idx
            for auc in eval_list:
                res.append([model, train_y, eval_y, years_between, auc])
                
                
for model, m_dict in redone_wy.items():
    for train_y, train_y_list in m_dict.items():
        for auc in train_y_list:
            res.append([model, train_y, train_y, 0, auc])
            

In [69]:
res_df = pd.DataFrame(res, columns=['model', 'train_year', 'eval_year', 'years_between_train_eval', 'auc'])

In [71]:
res_df.to_csv('./Data/results_pt_2.csv')