In [7]:
%matplotlib inline

import pandas as pd
import pickle
import matplotlib.pyplot as plt
import numpy as np

import json
import os
import glob
import datetime
import statistics

# Essay scoring

## Development set

In [56]:
# Gather all params.
results = []

for d in glob.glob("../output/*"):
    if not os.path.exists(os.path.join(d, "param.txt")):
        continue
    
    prm = dict([p.strip().split("=", 1) for p in open(os.path.join(d, "param.txt"))])
    prm["Directory"] = os.path.basename(d)
    prm["Time"] = str(datetime.datetime.fromtimestamp(os.stat(d).st_mtime))[:19]
    prm["Fold"] = [i for i in range(5) if os.path.exists(os.path.join(d, "regression_f{}.hdf5".format(i)))]
    prm["Eval"] = [i for i in range(5) if os.path.exists(os.path.join(d, "prediction_f{}.json".format(i)))]

    mse_pool = []
    
    for i in range(5):
        pjs = os.path.join(d, "prediction_f{}.json".format(i))
        
        if os.path.exists(pjs):
            pred = json.load(open(pjs))
            mse_pool += [pred["MSE"]]

    mse_pool = np.array(mse_pool)
    prm["MSEstd"] = mse_pool.std()    
    prm["MSE"] = mse_pool.mean()
    prm["MSEs"] = mse_pool
    
    if os.path.exists(os.path.join(d, "logs_f0.pickle")):
        l = pickle.load(open("{}/logs_f0.pickle".format(d), "rb"))
        df = pd.DataFrame(l["logs_e"])
        prm["R_loss"] = df["loss"].min()
        prm["R_val_loss"] = df["val_loss"].min()

    else:
        prm["R_loss"] = 0
        prm["R_val_loss"] = 0
        
    results += [prm]

cols = "Time score_type R_loss R_val_loss Fold Eval MSE MSEstd MSEs dropout clipnorm emb_fix enc_fix pseq preenc Directory".split()

pd.set_option("display.max_colwidth", 200)
df = pd.DataFrame(results)
df = df[(df.pseq == "False") & (df.score_type == "ArgumentStrength")]
df[cols].sort_values(by="Time", ascending=False)



Unnamed: 0,Time,score_type,R_loss,R_val_loss,Fold,Eval,MSE,MSEstd,MSEs,dropout,clipnorm,emb_fix,enc_fix,pseq,preenc,Directory
5,2019-01-12 17:37:42,ArgumentStrength,0.025373,0.026295,[0],[],,,[],0.75,10.0,True,False,False,,3793dec1345d88da59b252955f658b3c
54,2019-01-12 17:35:53,ArgumentStrength,0.025311,0.026186,[0],[],,,[],0.75,5.0,True,False,False,,ccdb2f9d426fff6f2cfa069b6228058b
13,2019-01-12 17:31:43,ArgumentStrength,0.017069,0.026153,[0],[],,,[],0.5,5.0,True,False,False,,6f682346fe20017541ed877929851636
35,2019-01-12 17:29:39,ArgumentStrength,0.00649,0.026637,[0],[],,,[],0.25,10.0,True,False,False,,768e5333a64c6a27631be0a830738654
18,2019-01-12 17:26:51,ArgumentStrength,0.007094,0.026043,[0],[],,,[],0.25,5.0,True,False,False,,c223f9171d2065520643cf6d654dc138
1,2019-01-12 16:52:18,ArgumentStrength,0.014867,0.026145,[0],[],,,[],0.5,10.0,True,False,False,,5cc10bbc73f490977573fb5b763ef0fc
33,2019-01-10 09:29:03,ArgumentStrength,0.027305,0.026089,"[0, 1, 2, 3, 4]","[0, 1, 2, 3, 4]",0.25502,0.014579,"[0.25664800333384447, 0.23941677197983893, 0.2723700790054423, 0.23736549721363503, 0.26929889274507046]",0.7,5.0,True,True,False,output_enc/9780456c95e7c048e2501106fd40c716,cee23dc2ed54aed0911230d84151441e
23,2019-01-10 09:25:09,ArgumentStrength,0.027335,0.025896,"[0, 1, 2, 3, 4]","[0, 1, 2, 3, 4]",0.252444,0.012027,"[0.2592230096534951, 0.23955059426323458, 0.259447455157723, 0.23680760373543905, 0.267192892992712]",0.7,5.0,False,False,False,output_enc/9780456c95e7c048e2501106fd40c716,b1809650ecdefed27f07e32a05dc3ade
53,2019-01-10 09:06:05,ArgumentStrength,0.027222,0.02637,"[0, 1, 2, 3, 4]","[0, 1, 2, 3, 4]",0.249249,0.012563,"[0.2553112997422369, 0.2392242413397571, 0.2528419103068555, 0.2315592930770336, 0.26730756532314587]",0.7,5.0,True,True,False,output_enc/c2c4d855a06224fd1096834eed11920d,fcbeada6ec3cb1984984fe3ec9cb664e
32,2019-01-10 09:05:27,ArgumentStrength,0.027229,0.025997,"[0, 1, 2, 3, 4]","[0, 1, 2, 3, 4]",0.250831,0.013217,"[0.26496048231386626, 0.23985156913924272, 0.2462020482008063, 0.23528099814833667, 0.26786079507282806]",0.7,5.0,False,False,False,output_enc/c2c4d855a06224fd1096834eed11920d,51a2e8727c1fe1fa27d190f879ce078d


## Test set

In [49]:
# Gather all results.
results = []

for d in glob.glob("../output/*"):
    if not os.path.exists(os.path.join(d, "param.txt")):
        continue
        
    prm = dict([p.strip().split("=", 1) for p in open(os.path.join(d, "param.txt"))])
    
    p = [json.load(open(os.path.join(d, "prediction_f{}.json".format(i)))) for i in range(5) if os.path.exists(os.path.join(d, "prediction_f{}.json".format(i)))]
    p = np.array([x["MSE"] for x in p])
    
    prm["Directory"] = os.path.basename(d)
    prm["MSEs"] = p
    prm["MSE"] = p.mean(), p.std()
    
    if "score_type" not in prm:
        prm["score_type"] = "Organization"
    
    results += [prm]
        
df = pd.DataFrame(results)
df = df.sort_values(by="score_type")
df[["Directory", "MSEs", "MSE", "score_type", "preenc", "enc_fix", "emb_fix", "pretrained", "pseq"]]

  from ipykernel import kernelapp as app


Unnamed: 0,Directory,MSEs,MSE,score_type,preenc,enc_fix,emb_fix,pretrained,pseq
26,51a2e8727c1fe1fa27d190f879ce078d,"[0.26496048231386626, 0.23985156913924272, 0.2462020482008063, 0.23528099814833667, 0.26786079507282806]","(0.250831178575016, 0.013216912233143372)",ArgumentStrength,output_enc/c2c4d855a06224fd1096834eed11920d,False,False,False,False
1,5cc10bbc73f490977573fb5b763ef0fc,[],"(nan, nan)",ArgumentStrength,,False,True,True,False
45,fcbeada6ec3cb1984984fe3ec9cb664e,"[0.2553112997422369, 0.2392242413397571, 0.2528419103068555, 0.2315592930770336, 0.26730756532314587]","(0.24924886195780577, 0.01256338952106033)",ArgumentStrength,output_enc/c2c4d855a06224fd1096834eed11920d,True,True,False,False
22,4d2b32819bf73eecafdc08288f39e34c,"[0.27303764034418093, 0.23817723298100674, 0.25462896249886086, 0.23306868678844408, 0.2626360273298695]","(0.2523097099884724, 0.014910417425571015)",ArgumentStrength,,False,True,True,False
19,b1809650ecdefed27f07e32a05dc3ade,"[0.2592230096534951, 0.23955059426323458, 0.259447455157723, 0.23680760373543905, 0.267192892992712]","(0.25244431116052074, 0.012027219127379462)",ArgumentStrength,output_enc/9780456c95e7c048e2501106fd40c716,False,False,False,False
15,8adb0fc48cedf9e322e09c406c747e5a,[],"(nan, nan)",ArgumentStrength,,False,True,True,True
27,cee23dc2ed54aed0911230d84151441e,"[0.25664800333384447, 0.23941677197983893, 0.2723700790054423, 0.23736549721363503, 0.26929889274507046]","(0.2550198488555663, 0.014578783265718775)",ArgumentStrength,output_enc/9780456c95e7c048e2501106fd40c716,True,True,False,False
28,cde3e253163f10bde9887a702811182a,"[0.17477832335600332, 0.2069103628534164, 0.20235256500706644, 0.21061206024685153, 0.24073340256757852]","(0.20707734280618326, 0.021027613516739027)",Organization,,False,True,True,True
29,2674fd3a66a463d7fd62804995734663,[],"(nan, nan)",Organization,output_enc/750570aed2d16633ecbe4237d2d95b71,False,False,False,True
30,6f27e738794f9eb73bd73c0c868d4cb2,"[0.1722483500131174, 0.1794667317586682, 0.16374400186657234, 0.2143731473086255, 0.21654933758099426]","(0.18927631370559553, 0.021962455607409677)",Organization,output_enc/750570aed2d16633ecbe4237d2d95b71,False,False,False,True


In [None]:
tn16pn10_pre_sentshuf_finetune = "0cb7a2429b6cc73297413c20570c824f"
tn16pn10_pre_sentshuf_nofinetune = "752aa6160e706a6ffe0f91a1e423b40a"

# Encoder pretraining

In [26]:
# Gather all params.
results = []

for d in glob.glob("../output_enc/*"):
    if not os.path.exists(os.path.join(d, "param.txt")):
        continue
    
    prm = dict([p.strip().split("=", 1) for p in open(os.path.join(d, "param.txt"))])
    prm["Directory"] = os.path.basename(d)
    prm["Time"] = str(datetime.datetime.fromtimestamp(os.stat(d).st_mtime))[:19]
    
    l = pickle.load(open("{}/logs.pickle".format(d), "rb"))
    df = pd.DataFrame(l["logs_e"])
    prm["R_acc"] = df["acc"].max()
    prm["R_val_acc"] = df["val_acc"].max()
    
    results += [prm]

cols = "Time R_acc R_val_acc dropout emb_fix enc_fix shuf Directory".split()

pd.set_option("display.max_colwidth", 50)
df = pd.DataFrame(results)
df[cols].sort_values(by="Time", ascending=False)

Unnamed: 0,Time,R_acc,R_val_acc,dropout,emb_fix,enc_fix,shuf,Directory
3,2018-12-27 14:18:45,0.993037,0.903384,0.7,False,False,di,a87b827fa7c5151192542ecb2c3af4d2
5,2018-12-27 13:25:00,0.986483,0.825873,0.5,False,False,di,6abbf82fd461ebcd2ac61867427b2a1e
1,2018-12-27 13:02:47,0.996313,0.853712,0.3,False,False,di,da6836f961365a7b348281a00e48bc34
2,2018-12-27 11:37:36,0.9929,0.936681,,,,,clipnorm=5.0_dropout=0.7_emb_dim=50_emb_fix=Fa...
4,2018-12-27 11:32:34,0.902512,0.743996,,,,,clipnorm=5.0_dropout=0.7_emb_dim=50_emb_fix=Fa...
0,2018-12-27 11:32:27,0.925587,0.742904,,,,,clipnorm=5.0_dropout=0.5_emb_dim=50_emb_fix=Fa...


In [2]:
### Score_normalized
# TN16
CUDA_VISIBLE_DEVICES=0 python src/train.py \
    --fold 1 \
    --model-type nea --dropout 0.5 \
    --embedding-dim 50 --aggregation-LSTMdim 300 \
    --gradientclipnorm 5 --meanovertime \
    --pre-trained --fix-embedding

fold_0: MSE: 0.3643280911204178, MAE: 0.4622013795375824
fold_1: MSE: 0.3197095480935262, MAE: 0.39644437405600474
fold_2: MSE: 0.4364101866079787, MAE: 0.438516518369836   
fold_3: MSE: 0.32989512169457946, MAE: 0.4378773703503965
fold_4: MSE: 0.3643280911204178, MAE: 0.4622013795375824
            
MSE: 0.362

SyntaxError: invalid syntax (<ipython-input-2-e702d4088bd6>, line 3)

In [None]:
fold0: MSE: 0.19625852776829697 MAE: 0.3492366951704025
fold1: 

In [5]:
# Gather all results.
results = []

for d in glob.glob("../output/*"):
    if not os.path.exists(os.path.join(d, "prediction_f1.json")):
        continue
        
    prm = dict([p.strip().split("=", 1) for p in open(os.path.join(d, "param.txt"))])
    
    p = json.load(open(os.path.join(d, "prediction_f1.json")))
    prm["MSE"] = p["MSE"]
    
    results += [prm]
        
df = pd.DataFrame(results)
df[["MSE", "preenc", "enc_fix", "emb_fix", "pretrained", "pseq"]]

Unnamed: 0,MSE,preenc,enc_fix,emb_fix,pretrained,pseq
0,0.335248,,False,True,True,False
1,0.31971,,False,True,True,False


# Command repo

In [1]:
# Command for training:

# TN16
CUDA_VISIBLE_DEVICES=0 python src/train.py \
    --fold 1 \
    --model-type nea --dropout 0.5 \
    --embedding-dim 50 --aggregation-grudim 300 \
    --gradientclipnorm 5 --meanovertime \
    --pre-trained --fix-embedding

# TN16+PN10
CUDA_VISIBLE_DEVICES=0 python src/train.py \
    --fold 1 \
    --model-type nea --dropout 0.5 \
    --embedding-dim 50 --aggregation-grudim 100 \
    --gradientclipnorm 5 --meanovertime \
    --pre-trained --fix-embedding \
    --persing-seq --pseq-embedding-dim 16 --pseq-encoder-dim 64

# TN16+PN10+pretrain(di. shuffle, fixed)
CUDA_VISIBLE_DEVICES=0 python src/train.py \
    --fold 1 \
    --model-type nea --dropout 0.5 \
    --embedding-dim 50 --aggregation-grudim 100 \
    --gradientclipnorm 5 --meanovertime \
    --persing-seq --pseq-embedding-dim 16 --pseq-encoder-dim 64 \
    --fix-encoder --fix-embedding \
    --pretrained-encoder output_enc/a87b827fa7c5151192542ecb2c3af4d2

# TN16+PN10+pretrain(di. shuffle, not fixed)
CUDA_VISIBLE_DEVICES=0 python src/train.py \
    --fold 1 \
    --model-type nea --dropout 0.5 \
    --embedding-dim 50 --aggregation-grudim 100 \
    --gradientclipnorm 5 --meanovertime \
    --persing-seq --pseq-embedding-dim 16 --pseq-encoder-dim 64 \
    --pretrained-encoder output_enc/a87b827fa7c5151192542ecb2c3af4d2

# TN16+PN10+pretrain(sent. shuffle, fixed)
CUDA_VISIBLE_DEVICES=1 python src/train.py \
    --fold 1 \
    --model-type nea --dropout 0.5 \
    --embedding-dim 50 --aggregation-grudim 100 \
    --gradientclipnorm 5 --meanovertime \
    --persing-seq --pseq-embedding-dim 16 --pseq-encoder-dim 64 \
    --fix-encoder --fix-embedding \
    --pretrained-encoder output_enc/clipnorm=5.0_dropout=0.7_emb_dim=50_emb_fix=False_enc_fix=False_model_type=nea_mot=True_pretrained=False_shuf=sentence

# TN16+PN10+pretrain(sent. shuffle, not fixed)
CUDA_VISIBLE_DEVICES=1 python src/train.py \
    --fold 1 \
    --model-type nea --dropout 0.5 \
    --embedding-dim 50 --aggregation-grudim 100 \
    --gradientclipnorm 5 --meanovertime \
    --persing-seq --pseq-embedding-dim 16 --pseq-encoder-dim 64 \
    --pretrained-encoder output_enc/clipnorm=5.0_dropout=0.7_emb_dim=50_emb_fix=False_enc_fix=False_model_type=nea_mot=True_pretrained=False_shuf=sentence

SyntaxError: invalid syntax (<ipython-input-1-dba85e4206fa>, line 4)

In [None]:
# Command for sentence encoder pretraining:
CUDA_VISIBLE_DEVICES=1 python src/train_enc.py \
    --model-type nea --dropout 0.3 \
    --embedding-dim 50 --aggregation-grudim 100 \
    --gradientclipnorm 5 --meanovertime \
    --shuffle-type di

CUDA_VISIBLE_DEVICES=1 python src/train_enc.py \
    --model-type nea --dropout 0.3 \
    --embedding-dim 50 --aggregation-grudim 100 \
    --gradientclipnorm 5 --meanovertime \
    --shuffle-type sentence

In [None]:
# Command for evaluation
CUDA_VISIBLE_DEVICES=1 python src/eval.py \
    --fold 1 \
    --model-dir output/cbc428f99e04b33c2dcb221e7331e07d

In [2]:
#only persing sequence
python src/train.py     --fold 1    
--model-type only_pseq --dropout 0.5  --gradientclipnorm 0 
--persing-seq --pseq-embedding-dim 16 --pseq-encoder-dim 400

SyntaxError: invalid syntax (<ipython-input-2-fb3cf3bcfae6>, line 2)

In [4]:
# CUDA_VISIBLE_DEVICES=1 python src/bulkrun.py train_allfolds ArgumentStrength 0
# `eval_allfolds_homo` can specify a specific model dir as an argument
# `eval_allfolds` can specify a number as an argument (again, the number corresponds to each model 
# defined in the script file)

In [3]:
bulkrun.py eval_allfolds_homo output/cee23dc2ed54aed09112
bulkrun.py eval_allfolds 0 

SyntaxError: invalid syntax (<ipython-input-3-1e3241d641a7>, line 1)

In [11]:
sample = [  0.15847840492204426, 0.2284701158980941, 
] 
print("Avg: % s " % (statistics.mean(sample)))
print("STDEV: % s " % (statistics.stdev(sample)))

Avg: 0.16415826940616166 
STDEV: 0.010048974180456405 


In [6]:
import pickle

logs = pickle.load(open("../output_enc/df1d63728591c861d72e2bcbe2dc6e57/logs.pickle", "rb"))
df = pd.DataFrame(logs["logs_e"])
df

Unnamed: 0,acc,loss,val_acc,val_loss
0,0.483233,0.702444,0.472527,0.693351
1,0.51017,0.699237,0.452747,0.694822
2,0.499725,0.698592,0.465934,0.694634
3,0.514568,0.696174,0.514286,0.694586
4,0.512369,0.694361,0.494505,0.694971
5,0.551402,0.691161,0.487912,0.699872


In [136]:
df.describe()

Unnamed: 0,acc,loss,val_acc,val_loss
count,7.0,7.0,7.0,7.0
mean,0.974367,0.062964,0.975104,0.067606
std,0.005107,0.012251,0.001949,0.004611
min,0.967826,0.047091,0.971963,0.061268
25%,0.970095,0.053662,0.973887,0.065061
50%,0.974426,0.063036,0.975811,0.068701
75%,0.978963,0.070945,0.976636,0.06955
max,0.980201,0.081403,0.97691,0.074054
