# Dynamic epistemic network based on Cogsnet structure

## Loading data

In [77]:
import os
import glob

from typing import Dict, List
from tqdm import tqdm, trange


def load_data(cogsnets_dir: str):
    step_to_adjacency_list: Dict[int, Dict[str, List[str]]] = {}
    # 1. get list of all documents
    cogsnet_paths = glob.glob(cogsnets_dir + '/*.csv')
    # 2. group documents by steps (1,2,3,...,N)
    cogsnets = [load_cogsnet(cp) for cp in cogsnet_paths]
    # 3. for each doc: read as pandas df, transform pandas df to adjacency list for a student given in doc name
    nr_steps = max([c.step for c in cogsnets])
    #for s in range(1, nr_steps+1):
    with trange(1, nr_steps+1) as t:
        for s in t:
            step_cogsnets = [c for c in cogsnets if c.step == s]
            step_to_adjacency_list[s] = {c.stud_id: c.get_adjacency_list() for c in step_cogsnets}
    # 4. return 
    return step_to_adjacency_list

In [12]:
cogsnets = result

In [16]:
cogsnets2 = [(c, int(c.split('-')[2])) for c in cogsnets]

In [17]:
cogsnets2[0]

('cogsnets\\cogsnet-13116-10-exponential-0.300000-0.200000-0.005631-3600.csv',
 10)

In [37]:
import pandas as pd


STUD_ID_POSITION = -7
STEP_POSITION = -6
FORGETTING_MODEL_POSITION = -5
MU_POSITION = -4
THETA_POSITION = -3
LAMBDA_POSITION = -2
UNITS_POSITION = -1

COL_WEIGHT = 'Weight'
COL_NEIGHBOR = 'AlterID'


class Cogsnet:
    def __init__(self, stud_id, step, forgetting_model, mu, theta, lmbda, units, df):
        self.stud_id = stud_id
        self.step = step
        self.forgetting_model = forgetting_model
        self.mu = mu
        self.theta = theta
        self.lmbda = lmbda
        self.units = units
        self.df = df
        
    def get_adjacency_list(self):
        adjacency_list = list(self.df[self.df[COL_WEIGHT] > 0][COL_NEIGHBOR])
        return adjacency_list

def load_cogsnet(path: str):
    splited_path = path.split('-')
    stud_id = splited_path[STUD_ID_POSITION]
    step = int(splited_path[STEP_POSITION])
    forgetting_model = splited_path[FORGETTING_MODEL_POSITION]
    mu = float(splited_path[MU_POSITION])
    theta = float(splited_path[THETA_POSITION])
    lmbda = float(splited_path[LAMBDA_POSITION])
    units = int(splited_path[UNITS_POSITION].split('.')[0])
    df = pd.read_csv(path, sep=';')
    cogsnet = Cogsnet(stud_id, step, forgetting_model, mu, theta, lmbda, units, df)
    return cogsnet

In [38]:
c = load_cogsnet('cogsnets\\cogsnet-13116-10-exponential-0.300000-0.200000-0.005631-3600.csv')

In [39]:
c.get_adjacency_list()

[62555, 94795, 20905, 30952, 60830, 97655]

In [40]:
list(range(1, 10))

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [79]:
data = load_data('cogsnets_full//')

100%|██████████████████████████████████████████████████████████████████████████████| 1103/1103 [04:24<00:00,  4.18it/s]


In [93]:
for i in range(1,10):
    print(f'Step: {i}')
    tmp = {k: data[i][k] for k in data[i].keys() if data[i][k]}
    for k in tmp.keys():
        print(f' -> {k}: {tmp[k]}')
    print()

Step: 1
 -> 20905: [13116, 60830, 75178]
 -> 30952: [60830, 20905]
 -> 60830: [75178, 30952, 13116, 97655, 95135]
 -> 75178: [30952, 60830]

Step: 2
 -> 13116: [62555, 94795]
 -> 60830: [75178, 30952, 13116, 97655, 95135, 22931]
 -> 62555: [13116]
 -> 69669: [20905, 97655]

Step: 3
 -> 10841: [88133]
 -> 13116: [62555, 94795]
 -> 20905: [13116, 60830, 75178, 30952, 69669]
 -> 26999: [13116, 62555]
 -> 30952: [60830, 20905, 95135, 75178, 69669]
 -> 60830: [75178, 30952, 13116, 97655, 95135, 22931]
 -> 62555: [13116, 60830]
 -> 69669: [20905, 97655, 60830]

Step: 4
 -> 13116: [62555, 94795]
 -> 14188: [31583]
 -> 17353: [10841]
 -> 20905: [13116, 30952, 69669]
 -> 22931: [22931]
 -> 26999: [13116, 62555, 94795]
 -> 30952: [60830, 20905, 95135, 75178, 69669, 22931]
 -> 39122: [22931]
 -> 51597: [92220]
 -> 53275: [86352]
 -> 60830: [75178, 30952, 97655, 22931, 69669]
 -> 62555: [13116, 60830, 26999]
 -> 69669: [20905, 97655, 60830]
 -> 77824: [18394]
 -> 78966: [55464]
 -> 85596: [52923]


In [80]:
import pickle

In [81]:
with open('step_to_adjacency_list.pickle', 'wb') as handle:
    pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [91]:
data[1]

KeyError: 0