In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from keras.models import Model, model_from_json, load_model
from keras.layers import Input, LSTM, Dense
from keras.callbacks import ModelCheckpoint
import numpy as np
import keras
from tqdm import tqdm
import pandas as pd
from time import time

Using TensorFlow backend.


In [3]:
from model import predict_form, predict_paradigm

### model try

In [4]:
enc = load_model('retraining_models/encoder-13.h5')
dec = load_model('retraining_models/decoder-13.h5')

In [5]:
predict_form('кукурузохранилище1I',enc,dec)

'кукурузохранилищем'

In [6]:
predict_paradigm('кукурузохранилище',enc,dec)

Unnamed: 0,Singular,Plural
Nom,кукурузохранилище,кукурузохранилища
Gen,кукурузохранилища,кукурузохранилищ
Dat,кукурузохранилищу,кукурузохранилищам
Acc,кукурузохранилище,кукурузохранилища
Ins,кукурузохранилищем,кукурузохранилищами
Loc,кукурузохранилище,кукурузохранилищах


### accuracy

In [7]:
def accuracy(data):
    eval_ = data['seq2seq_eval']
    value_counts =  eval_.value_counts()
    try:
        truth = value_counts[True]
    except:
        truth = 0
    try:
        errors = value_counts[False]
    except:
        errors = 0
    acc = truth/(len(eval_))
    acc = round(acc,4)
    print('False:', errors)
    print('True:', truth)
    print('accuracy:',acc)
    print('\n')
    return acc

### data

In [16]:
path_to_rfile = 'data/test/test_full.csv' 
data_full = pd.read_csv(path_to_rfile, encoding='utf-8',sep='\t')

In [17]:
data = data_full[data_full.apply(lambda x: not x['classtag'].endswith(('-','+','0')), axis=1)]

In [18]:
len(data)

126396

In [19]:
for i in range(19,39):
    print('evaluating model_{}...'.format(i))
    wfile = 'training_loop/eval/eval_model_{}.csv'.format(i)
    data_eval = data.copy()
    enc_name = 'retraining_models/encoder-{}.h5'.format(i)
    dec_name = 'retraining_models/decoder-{}.h5'.format(i)
    enc = load_model(enc_name)
    dec = load_model(dec_name)
    seq2seq_predictions = []
    seq2seq_eval = []
    t = time()
    for row in tqdm(range(data.shape[0])):
        inp = data_eval.iloc[row,0]+str(data_eval.iloc[row,2])
        out = predict_form(inp,enc,dec).rstrip()
        seq2seq_predictions.append(out)
        if out == data_eval.iloc[row,4]:
            seq2seq_eval.append('True')
        else:
            seq2seq_eval.append('False')
    data_eval['seq2seq_predictions'] = seq2seq_predictions
    data_eval['seq2seq_eval'] = seq2seq_eval
    print('evaluating took {} min'.format(round((time() - t)/60,4)))
    print('for model_{}:'.format(i))
    acc = accuracy(data_eval)
    data_eval.to_csv(wfile, encoding='utf-8',sep='\t', index=False)
    with open('training_loop/eval/eval_result.csv','a',encoding = 'utf-8') as file:
        file.write('{}\t{}\n'.format(i,acc))

evaluating model_19...


100%|██████████████████████████████████████████████████████████████████████████| 126396/126396 [22:23<00:00, 94.05it/s]


evaluating took 22.3979 min
for model_19:
Total count: 126396
False: 113797
True: 12599
accuracy: 0.0997


evaluating model_20...


100%|██████████████████████████████████████████████████████████████████████████| 126396/126396 [22:09<00:00, 95.09it/s]


evaluating took 22.155 min
for model_20:
Total count: 126396
False: 115085
True: 11311
accuracy: 0.0895


evaluating model_21...


100%|██████████████████████████████████████████████████████████████████████████| 126396/126396 [21:11<00:00, 99.44it/s]


evaluating took 21.1838 min
for model_21:
Total count: 126396
False: 114944
True: 11452
accuracy: 0.0906


evaluating model_22...


100%|██████████████████████████████████████████████████████████████████████████| 126396/126396 [21:38<00:00, 97.37it/s]


evaluating took 21.6346 min
for model_22:
Total count: 126396
False: 114766
True: 11630
accuracy: 0.092


evaluating model_23...


100%|██████████████████████████████████████████████████████████████████████████| 126396/126396 [21:41<00:00, 97.11it/s]


evaluating took 21.6934 min
for model_23:
Total count: 126396
False: 114576
True: 11820
accuracy: 0.0935


evaluating model_24...


100%|██████████████████████████████████████████████████████████████████████████| 126396/126396 [23:13<00:00, 90.73it/s]


evaluating took 23.219 min
for model_24:
Total count: 126396
False: 114493
True: 11903
accuracy: 0.0942


evaluating model_25...


100%|██████████████████████████████████████████████████████████████████████████| 126396/126396 [22:37<00:00, 93.08it/s]


evaluating took 22.6317 min
for model_25:
Total count: 126396
False: 114342
True: 12054
accuracy: 0.0954


evaluating model_26...


 53%|███████████████████████████████████████▊                                   | 67181/126396 [12:14<10:47, 91.43it/s]

KeyboardInterrupt: 

###  по уже прогнанному тесту

In [13]:
def accuracy(data):
    a,t,f = 0,0,0
    eval_ = data['seq2seq_eval']
    values =  eval_.value_counts()
    t = values[True]
    f = values[False]
    a = t/(len(eval_))
    a = round(a,4)
    print('Total count:', len(eval_))
    print('False:', f)
    print('True:', t)
    print('accuracy:',a)
    print('\n')
    return a

In [15]:
with open('training_loop/eval/eval_true_result.csv','w',encoding = 'utf-8') as file:
    for i in range(1,40):
        print('evaluating model_{}...'.format(i))
        wfile = 'training_loop/eval/eval_model_{}.csv'.format(i)
        data_eval = pd.read_csv(wfile, encoding='utf-8',sep='\t')
        acc = accuracy(data_eval)
        #data_eval.to_csv(wfile, encoding='utf-8',sep='\t', index=False)
        file.write('{}\t{}\n'.format(i,acc))

evaluating model_1...
Total count: 126396
False: 114680
True: 11716
accuracy: 0.0927


evaluating model_2...
Total count: 126396
False: 85803
True: 40593
accuracy: 0.3212


evaluating model_3...
Total count: 126396
False: 50508
True: 75888
accuracy: 0.6004


evaluating model_4...
Total count: 126396
False: 38004
True: 88392
accuracy: 0.6993


evaluating model_5...
Total count: 126396
False: 25510
True: 100886
accuracy: 0.7982


evaluating model_6...
Total count: 126396
False: 20103
True: 106293
accuracy: 0.841


evaluating model_7...
Total count: 126396
False: 18449
True: 107947
accuracy: 0.854


evaluating model_8...
Total count: 126396
False: 16734
True: 109662
accuracy: 0.8676


evaluating model_9...
Total count: 126396
False: 16302
True: 110094
accuracy: 0.871


evaluating model_10...
Total count: 126396
False: 14920
True: 111476
accuracy: 0.882


evaluating model_11...
Total count: 126396
False: 13761
True: 112635
accuracy: 0.8911


evaluating model_12...
Total count: 126396
False

FileNotFoundError: File b'training_loop/eval/eval_model_19.csv' does not exist