In [1]:
import rpy2.robjects as robjects
import argparse
import os
import json
import warnings
import pandas as pd
import numpy as np
import time

from op_main_copy import OperatorNetInit
from train import train

warnings.filterwarnings(action='ignore')

In [4]:
print("#"*20," FIR-DKT START ", "#"*20)

num_partial_kc = 12
num_candidate_kc = 10
num_total_kc = 20
num_logs = 1000
target_err = 0.05

####################  FIR-DKT START  ####################


In [5]:
# 전체 세트에서 첫번째 subset 반환
def SelPartialKCs(json_path, data_path):
    sub_kcs = np.zeros(num_partial_kc)
    
    #json_path = './info_back.json'
    # 기존 json 파일 읽어오기
    with open(json_path, 'r') as file:
        data = json.load(file)

    # 데이터 수정
    data[0]['input_file_name'] = str(data_path)

    # 기존 json 파일 덮어쓰기
    with open(json_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, indent="\t", ensure_ascii=False)

    # R 관계 분석 실행
    robjects.r.source('./20220731_RF_LASSO.R', encoding='utf-8')
    
    # 결과 파일 가져와서 정제하기
    subset_df = pd.read_csv('./OUT/relation.csv')
    
    all_kcs = list(subset_df['before'].unique())
    
    
    # sub_kcs 는 첫번째 원소는 before, 그 뒤는 모두 rank 순으로 after임.
    sub_kcs = {}
    for i in range(len(all_kcs)):
        cond = subset_df['before'] == all_kcs[0]
        after_kcs = list(subset_df[cond]['after'])

        sub_kcs[all_kcs[i]] = after_kcs
    
    
    # 후보로 뽑힌 12개 KC 반환
    return all_kcs, sub_kcs

In [6]:
def DoOperatorNet(data_path, n_epochs, kc_candidates, target_KC):
    # operator net을 위한 init
    model, n_epochs,train_loader,device,optimizer,valid_loader,n_items,acc_history, auc_history,f1_score_history = OperatorNetInit(data_path, n_epochs, kc_candidates, target_KC)

    print("="*20 + "TRAIN START" + "="*20)
    train(model, n_epochs,train_loader,device,optimizer,valid_loader,n_items,acc_history, auc_history,f1_score_history)
    print("="*20 + "TRAIN END" + "="*20)

    print(f1_score_history,auc_history, acc_history)
    
    ranked_kc_rel = f1_score_history
    cur_err = auc_history

    return ranked_kc_rel, cur_err

In [7]:
def GetRankedKCGraph(json_path, data_path):
    arr_ranked_kc_graph = np.zeros((num_total_kc, num_total_kc))
    
    # 전체 중 첫번째 subset (12개 KC)
    all_kcs, sub_kcs = SelPartialKCs(json_path, data_path)
    
    
    for kc_idx in range(len(num_total_kc)):
        # 12개 중 상위 10개 (임의로 상위 10개로 선정한 것. 나중에 수정 예정)
        kc_candidates = sub_kcs[all_kcs[kc_idx]][:10]
        
        
        # operator net(train)
        ranked_kc_rel, cur_err = DoOperatorNet(data_path, n_epochs, kc_candidates, all_kcs[kc_idx])

        print(ranked_kc_rel, cur_err)
        exit()

In [8]:
json_path = './info_back.json'
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
data_path = "./data/my_algebra_picture3000_ordered_binary.csv"
n_epochs = 30

In [9]:
#GetRankedKCGraph(json_path, data_path)

In [10]:
# 결과 파일 가져와서 정제하기
subset_df = pd.read_csv('./OUT/relation.csv')

all_kcs = list(subset_df['before'].unique())


# sub_kcs 는 첫번째 원소는 before, 그 뒤는 모두 rank 순으로 after임.
sub_kcs = {}
for i in range(len(all_kcs)):
    cond = subset_df['before'] == all_kcs[i]
    after_kcs = list(subset_df[cond]['after'])

    sub_kcs[all_kcs[i]] = after_kcs

In [11]:
all_kcs

['Copy.initial.in.diagram',
 'Draw.larger.bar....addition.subtraction',
 'Draw.larger.bar....multiplication',
 'Draw.smaller.bar....addition.subtraction',
 'Draw.smaller.bar....multiplication',
 'Enter.added.quantity.in.diagram',
 'Enter.initial.in.diagram....given',
 'Enter.larger.inital.in.diagram....calculated',
 'Enter.larger.initial.in.diagram....given',
 'Enter.quantity.from.diagram.by.calculating',
 'Enter.quantity.from.diagram.by.reading',
 'Enter.smaller.inital.in.diagram....calculated',
 'Enter.smaller.initial.in.diagram....given',
 'Enter.subtracted.quantity.in.diagram',
 'Enter.total.in.diagram...calculated...addition',
 'Enter.total.in.diagram...calculated...multiplication',
 'Enter.total.in.diagram...calculated...subtraction',
 'Identify.larger.quantity....addition',
 'Identify.larger.quantity....multiplication',
 'Identify.larger.quantity....subtraction']

In [12]:
sub_kcs['Draw.larger.bar....multiplication']

['Enter.added.quantity.in.diagram',
 'Copy.initial.in.diagram',
 'Enter.total.in.diagram...calculated...addition',
 'Draw.smaller.bar....multiplication',
 'Enter.quantity.from.diagram.by.calculating',
 'Draw.larger.bar....addition.subtraction',
 'Identify.larger.quantity....subtraction',
 'Identify.larger.quantity....multiplication',
 'Enter.subtracted.quantity.in.diagram',
 'Enter.quantity.from.diagram.by.reading',
 'Enter.smaller.inital.in.diagram....calculated']

In [41]:
print(all_kcs[1])
print(sub_kcs[all_kcs[1]])

print(all_kcs[15])
print(sub_kcs[all_kcs[15]])

Draw.larger.bar....addition.subtraction
['Enter.smaller.inital.in.diagram....calculated', 'Copy.initial.in.diagram', 'Draw.smaller.bar....multiplication', 'Draw.larger.bar....multiplication', 'Enter.total.in.diagram...calculated...addition', 'Enter.total.in.diagram...calculated...subtraction', 'Identify.larger.quantity....multiplication', 'Enter.added.quantity.in.diagram', 'Enter.initial.in.diagram....given', 'Identify.larger.quantity....subtraction', 'Enter.larger.initial.in.diagram....given', 'Draw.smaller.bar....addition.subtraction']
Enter.total.in.diagram...calculated...multiplication
['Enter.larger.inital.in.diagram....calculated', 'Enter.smaller.inital.in.diagram....calculated', 'Enter.total.in.diagram...calculated...addition', 'Identify.larger.quantity....addition', 'Copy.initial.in.diagram', 'Enter.total.in.diagram...calculated...subtraction', 'Identify.larger.quantity....subtraction', 'Enter.quantity.from.diagram.by.calculating', 'Enter.initial.in.diagram....given', 'Draw.sma

In [42]:
print(all_kcs[0])
print(all_kcs[3])
sub_kcs[all_kcs[0]] == sub_kcs[all_kcs[3]]

Copy.initial.in.diagram
Draw.smaller.bar....addition.subtraction


False

In [43]:
#for kc_idx in range(num_total_kc):
for kc_idx in range(2):
    # 12개 중 상위 10개 (임의로 상위 10개로 선정한 것. 나중에 수정 예정)
    print("all_kcs[kc_idx] : ", all_kcs[kc_idx])
    kc_candidates = sub_kcs[all_kcs[kc_idx]][:10]
    print(len(kc_candidates))
    print(kc_candidates)

    # operator net(train)
    n_epochs=3
    ranked_kc_rel, cur_err = DoOperatorNet(data_path, n_epochs, kc_candidates, all_kcs[kc_idx])

    print(ranked_kc_rel, cur_err)
#    exit()

all_kcs[kc_idx] :  Copy.initial.in.diagram
10
['Enter.smaller.inital.in.diagram....calculated', 'Draw.larger.bar....multiplication', 'Enter.total.in.diagram...calculated...addition', 'Enter.quantity.from.diagram.by.calculating', 'Draw.larger.bar....addition.subtraction', 'Enter.subtracted.quantity.in.diagram', 'Enter.total.in.diagram...calculated...subtraction', 'Enter.initial.in.diagram....given', 'Enter.larger.initial.in.diagram....given', 'Enter.smaller.initial.in.diagram....given']
size of train_data :  1800
size of valid_data :  900
size of test_data :  300
device:  cuda
--------------------MODEL PREPARE--------------------
DKT(
  (rnn): LSTM(22, 50, num_layers=4, dropout=0.2)
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=11, bias=True)
    (1): Sigmoid()
  )
)
Epoch 1: f1_score=0.8988 auc_score=0.7231 highest_auc=0.0000
Epoch 2: f1_score=0.9207 auc_score=0.7391 highest_auc=0.7231
Epoch 3: f1_score=0.9207 auc_score=0.7458 highest_auc=0.7391
The best validati