In [97]:
import numpy as np
import os 
import re
import pandas as pd
from scipy.stats import hmean


def get_file_lines(model):
    dir_ = 'Messreihe/' 
    lines = np.array([])
    for path, __, files in os.walk(dir_): 
        for file in files:
            if model == file[:-4]:
                lines = np.append(lines, open(path + file, 'r').readlines())
    return lines

models = ['lstm_bs64','lstm_bs32','lstm_bs8', 'lstm_bs8_cd0', 'gru_bs64', 'gru_bs32', 'gru_bs8', 'gru_bs8_cd0']

data = {model : get_file_lines(model) for model in models}

In [98]:
def get_dec_times(lines : np.array):
    re1 = re.compile('Decodertime :')  #Zeit eines Decoderschritts (Ausführung der Decoder-Zelle)
                                       #im Script Copy_seq2seq_Bahdanu.py
    indexes_dec = np.array(np.where([bool(re.search(re1, el)) for el in lines])).flatten()
    tdec_step = np.array([float(line[14:-1]) for line in lines[indexes_dec]]) 
    return tdec_step[5:] #[5:] weil erste Werte große Abweichungen durch Doppelmessung haben

def get_dec_times_per_batch(lines : np.array): 
    re1 = re.compile('Decodertime :')  
    re2 = re.compile('Batch [1-6] Test Time =')
    
    bli = batched_line_indexes = np.array(np.where([bool(re.search(re2, el)) for el in lines])).flatten()
    out = {}
    for i in range(6):
        if i != 0:
            l_ = lines[bli[i-1]:bli[i]]
        else:
            l_ = lines[:bli[i]] 
        indexes = np.array(np.where([bool(re.search(re1, el)) for el in l_])).flatten()
        tgather_logprobs = np.array([float(line[14:-1]) for line in l_[indexes]]) 
        out.update({str(i+1) : tgather_logprobs})
    return out #{Batch : array(values)}
                                                          
def analyse_dec(model, batches=range(6)):
    l = data[model]
    data_dict = get_dec_times_per_batch(data[model])

    cols = pd.MultiIndex.from_arrays([[str(i+1) for i in batches]], names=['Testbatch'])
    rows = ['mean', 'std', 'max', 'min', 'sum', 'count']
    funcs = [np.mean, np.std, np.max, np.min, np.sum, len]
    df_dict = {str(key) : {func_key : func(data_dict[str(key)]) \
                               for (func_key, func) in zip(rows, funcs) } for key in range(1,7)}

    
    print(f'{model}')
    df = pd.DataFrame.from_dict(df_dict)
    df.columns = cols
    df = df.round(6)
    display(df)  

print('Zeiten für Funktion gather_final_log_probs: \n')
for k in data.keys():
    analyse_dec(k)

Zeiten für Funktion gather_final_log_probs: 

lstm_bs64


Testbatch,1,2,3,4,5,6
mean,0.000171,0.00017,0.00017,0.00017,0.00017,0.000158
std,5.5e-05,9e-06,9e-06,9e-06,9e-06,1.1e-05
max,0.001706,0.000246,0.000239,0.000243,0.000236,0.000272
min,0.000142,0.000163,0.000163,0.000163,0.000163,0.000149
sum,0.153743,0.084923,0.084777,0.084797,0.084935,0.054951
count,897.0,500.0,500.0,500.0,500.0,347.0


lstm_bs32


Testbatch,1,2,3,4,5,6
mean,0.000168,0.000156,0.000154,0.000154,0.000155,0.000143
std,2.5e-05,9e-06,9e-06,9e-06,1e-05,1.3e-05
max,0.000598,0.000212,0.000218,0.000212,0.000227,0.000292
min,0.000144,0.000144,0.000143,0.000143,0.000144,0.000128
sum,0.117623,0.07776,0.077178,0.077225,0.077679,0.065958
count,700.0,500.0,500.0,500.0,500.0,461.0


lstm_bs8


Testbatch,1,2,3,4,5,6
mean,0.000167,0.000155,0.000154,0.000155,0.000154,0.00014
std,3.3e-05,9e-06,1e-05,9e-06,1e-05,2.1e-05
max,0.000865,0.000206,0.00021,0.000211,0.000213,0.000249
min,0.000143,0.000144,0.000143,0.000143,0.000143,0.00013
sum,0.141664,0.07747,0.077057,0.077624,0.077106,0.008518
count,847.0,500.0,500.0,500.0,500.0,61.0


lstm_bs8_cd0


Testbatch,1,2,3,4,5,6
mean,0.000167,0.000168,0.000158,0.000161,0.000159,0.000158
std,2.5e-05,2.5e-05,1e-05,1.4e-05,1.2e-05,8e-06
max,0.000569,0.000222,0.000244,0.000248,0.000255,0.000216
min,0.000143,0.00015,0.000143,0.000147,0.000144,0.000151
sum,0.121716,0.00101,0.128927,0.011106,0.122613,0.059902
count,727.0,6.0,818.0,69.0,769.0,379.0


gru_bs64


Testbatch,1,2,3,4,5,6
mean,0.000153,0.000147,0.000145,0.000146,0.000146,0.000136
std,2.3e-05,1.1e-05,1e-05,9e-06,1e-05,1.2e-05
max,0.000437,0.000275,0.00022,0.000224,0.000204,0.00024
min,0.000133,0.000134,0.000134,0.000133,0.000134,0.000122
sum,0.107214,0.073361,0.072349,0.073038,0.072778,0.06806
count,700.0,500.0,500.0,500.0,500.0,500.0


gru_bs32


Testbatch,1,2,3,4,5,6
mean,0.00016,0.000158,0.000158,0.000158,0.000158,0.000151
std,2.3e-05,8e-06,7e-06,8e-06,8e-06,2e-05
max,0.000542,0.000215,0.000224,0.000224,0.000224,0.000243
min,0.00014,0.000153,0.000153,0.000153,0.000153,0.00014
sum,0.125623,0.051391,0.06067,0.052321,0.04898,0.004969
count,783.0,325.0,384.0,331.0,310.0,33.0


gru_bs8


Testbatch,1,2,3,4,5,6
mean,0.000151,0.000144,0.000138,0.000138,0.000145,0.000133
std,2.3e-05,9e-06,8e-06,8e-06,9e-06,1.2e-05
max,0.000477,0.000208,0.000214,0.00021,0.00021,0.000256
min,0.000134,0.000132,0.000132,0.000132,0.000132,0.000122
sum,0.105948,0.071953,0.068846,0.068865,0.07267,0.047353
count,700.0,500.0,500.0,500.0,500.0,355.0


gru_bs8_cd0


Testbatch,1,2,3,4,5,6
mean,0.000155,0.000155,0.000154,0.000141,0.00014,0.000135
std,2e-05,8e-06,8e-06,8e-06,7e-06,1.8e-05
max,0.000411,0.00022,0.000227,0.000199,0.000209,0.000207
min,0.000133,0.000149,0.000148,0.000136,0.000136,0.000122
sum,0.121404,0.050277,0.059273,0.046638,0.043316,0.004443
count,783.0,325.0,384.0,331.0,310.0,33.0


In [103]:

def get_mean_dectimes(models): #gibt durchschnittliche Decoderzeiten für lstm oder gru aus.
    mv = mean_vals = np.array([])
    for model in models:
        l = data[model]
        tdec = get_dec_times(l)
        mv = np.append(mv, np.mean(tdec))  
    return mv
 
"""def analyse(lstm, gru):
    lstm_mv = get_mean_dectimes(lstm)       
    gru_mv  = get_mean_dectimes(gru)  

    print(f'Durch. Decoderschrittzeiten für GRU: {np.mean(gru_mv)}')     
    print(f'Durch. Decoderschrittzeiten für LSTM: {np.mean(lstm_mv)}')
    print()"""
def analyse(decoder, batches=range(1,5)):
    arr_all = np.array([])
    for model in decoder:
        arr = np.array([])
        vals = get_dec_times_per_batch(np.array(data[model])) 
        for i in batches:
            arr = np.append(arr, vals[str(i+1)])
        mv = np.mean(arr)
        arr_all = arr
        print(f'Durch. Decoderschrittzeiten für {model} \t: {np.mean(mv)}')  
    #print(f'std \t\t\t\t\t\t: {np.std(arr_all)}')
    #print(f'hmean \t\t\t\t\t\t: {hmean(arr_all)}')    
    print(f'Gesamt \t\t\t\t\t\t: {np.mean(arr_all)}')   
    print('______________________________________________________________')
    

lstm = ['lstm_bs64','lstm_bs32','lstm_bs8', 'lstm_bs8_cd0']
gru =  ['gru_bs64', 'gru_bs32', 'gru_bs8', 'gru_bs8_cd0']

analyse(lstm, batches=range(6)) 
analyse(gru, batches=range(6)) 

#analyse(lstm, batches=range(1,5)) 
#analyse(gru, batches=range(1,5)) 

Durch. Decoderschrittzeiten für lstm_bs64 	: 0.00016896623277487795
Durch. Decoderschrittzeiten für lstm_bs32 	: 0.00015609771984357392
Durch. Decoderschrittzeiten für lstm_bs8 	: 0.0001579911705552302
Durch. Decoderschrittzeiten für lstm_bs8_cd0 	: 0.00016086475353020463
Gesamt 						: 0.00016086475353020463
______________________________________________________________
Durch. Decoderschrittzeiten für gru_bs64 	: 0.0001458749920129776
Durch. Decoderschrittzeiten für gru_bs32 	: 0.0001587971205636515
Durch. Decoderschrittzeiten für gru_bs8 	: 0.00014259733271871963
Durch. Decoderschrittzeiten für gru_bs8_cd0 	: 0.00015020854805535117
Gesamt 						: 0.00015020854805535117
______________________________________________________________


In [None]:
def get_mean_(models): #gibt durchschnittliche Decoderzeiten für lstm oder gru aus.
    mv = mean_vals = np.array([])
    for model in models:
        l = data[model]
        tdec = get_dec_times(l)
        mv = np.append(mv, np.mean(tdec))  
    return mv
 
"""def analyse(lstm, gru):
    lstm_mv = get_mean_dectimes(lstm)       
    gru_mv  = get_mean_dectimes(gru)  

    print(f'Durch. Decoderschrittzeiten für GRU: {np.mean(gru_mv)}')     
    print(f'Durch. Decoderschrittzeiten für LSTM: {np.mean(lstm_mv)}')
    print()"""
def analyse(decoder, batches=range(1,5)):
    arr_all = np.array([])
    for model in decoder:
        arr = np.array([])
        vals = get_dec_times_per_batch(np.array(data[model])) 
        for i in batches:
            arr = np.append(arr, vals[str(i+1)])
        mv = np.mean(arr)
        arr_all = arr
        print(f'Durch. Decoderschrittzeiten für {model} \t: {np.mean(mv)}')  
    #print(f'std \t\t\t\t\t\t: {np.std(arr_all)}')
    #print(f'hmean \t\t\t\t\t\t: {hmean(arr_all)}')    
    print(f'Gesamt \t\t\t\t\t\t: {np.mean(arr_all)}')   
    print('______________________________________________________________')
    

lstm = ['lstm_bs64','lstm_bs32','lstm_bs8', 'lstm_bs8_cd0']
gru =  ['gru_bs64', 'gru_bs32', 'gru_bs8', 'gru_bs8_cd0']

analyse(lstm, batches=range(6)) 
analyse(gru, batches=range(6)) 

#analyse(lstm, batches=range(1,5)) 
#analyse(gru, batches=range(1,5)) 