In [1]:
import tensorflow as tf
import pickle, pandas as pd, re, numpy as np, ast, warnings

from joblib import Parallel, delayed

import time

from collections import defaultdict, OrderedDict
from itertools import chain, starmap
from itertools import product
import unicodedata
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score, precision_score, accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline

from TurkishStemmer import TurkishStemmer
warnings.filterwarnings(action='ignore', category=UserWarning, module='gensim')
import gensim
from textblob import TextBlob

  from ._conv import register_converters as _register_converters


In [2]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    
reset_graph()

In [3]:
df_full = pd.read_csv("../datasets/movie_data.csv")
df_full.head()

Unnamed: 0,Language,Movie_ID,Review,Score
0,en,-800777728,i love science fiction and i hate superheroes ...,9
1,en,-800777728,the movie is absolutely incredible all the per...,10
2,en,-1018312192,in a cinematic era dominated by reboots and mi...,8
3,en,-1018312192,movie review on rise of the planet of the apes...,4
4,en,-1018312192,during experiments to find a cure for alzheime...,7


In [4]:
df_full.groupby("Score").count()

Unnamed: 0_level_0,Language,Movie_ID,Review
Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,29,29,29
2,21,21,21
3,14,14,14
4,23,23,23
5,83,83,83
6,43,43,43
7,71,71,71
8,207,207,207
9,175,175,175
10,334,334,334


In [5]:
en_vects = gensim.models.KeyedVectors.load_word2vec_format(r"../../NLP_data/GoogleNews-vectors-negative300.bin", binary=True)

In [6]:
tr_vects = gensim.models.KeyedVectors.load_word2vec_format(r"../../NLP_data/wiki.tr/wiki.tr.vec", binary=False)

In [7]:
turkish_stemmer = TurkishStemmer()
def clean(text, language="en", stem=True):
    global turkish_stemmer
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').lower().decode("ascii")
    
    if language == "tr":
        if stem:
            text= ' '.join([turkish_stemmer.stem(w) for w in text.split()])
    text = re.sub(r"[^A-Za-z0-9^,!.\/'+-=]", " ", text)
    text = re.sub(r'[0-9]', '#', text)
    text = re.sub(r",", " ", text)
    text = re.sub(r"\.", " ", text)
    text = re.sub(r"!", " ", text)
    text = re.sub(r"\/", " ", text)
    text = re.sub(r"\^", " ", text)
    text = re.sub(r"\+", " ", text)
    text = re.sub(r"\-", " ", text)
    text = re.sub(r"\=", " ", text)
    text = re.sub(r"'", " ", text)
    text = re.sub(r":", " ", text)
    text = re.sub(r"e(\s)?-(\s)?mail", "email", text)

    text = re.sub(r"what's", "what is ", text)
    text = re.sub(r"\'s", " ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"can't", "cannot ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r" e g ", " eg ", text)
    text = re.sub(r" b g ", " bg ", text)
    text = re.sub(r" u s ", " american ", text)
    return TextBlob(text)


In [8]:
VECTOR_SIZE = 300
def vectorize(text, language):
    global VECTOR_SIZE            
    blob = clean(text, language)
    vector = np.zeros(VECTOR_SIZE)
    if len(blob.words) < 1:
        return None

    for word in blob.words:
        try:
            if language == "en":
                vector += globals()["en_vects"][word]
            else:
                vector += globals()["tr_vects"][word]
        except KeyError as e:
#             warnings.warn(str(e))
            continue
    vector /= max(len(blob.words),1)
    return vector

In [9]:
def getvec(x):
    lang, rev = x.split(":::::")
    return vectorize(rev, lang)

In [10]:
# LMSR
def preprocess_data(df, language_column="Language", review_column="Review"):
    LMSR_df = df.copy()
    LMSR_df["lang_rev"] = LMSR_df[[language_column, review_column]].apply(lambda x: x[0]+":::::"+x[1], axis=1)
    LMSR_df["rev_vec"] = LMSR_df["lang_rev"].apply(lambda x:getvec(x))
    LMSR_df.drop(["lang_rev", "Review"], axis=1, inplace=True)
    return LMSR_df

In [11]:
def distance_accuracy(y_true, y_predict):
    res = 0
    for i in range(len(y_true)):
        res += abs(y_true[i]-y_predict[i])
    return 1-res/(len(y_true)*len(set(y_true)))

In [12]:
def get_XYy(LMSR):
    X = np.zeros((len(LMSR), VECTOR_SIZE))
    Y = np.zeros((len(LMSR), VECTOR_SIZE))
    y = np.zeros((len(LMSR)))
    i = 0
    for rev in LMSR.iterrows():
        score = rev[1][2]
        rev_vec = rev[1][3]
        score_vec = rev[1][4]

        X[i] = rev_vec
        Y[i] = score_vec
        y[i] = score

        i += 1
    return X, Y, y

In [13]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

In [14]:
def sigmoid(x, derive=False):
    if derive:
        return x*(1-x)
    return 1/(1+np.exp(-x))

In [15]:
def get_data_dict(df, get_L2and3=False):
    data_dict = dict() #{language:{score: {movie_id: [rev1, rev2, ..., revn]}}}
    L1 = dict()  # {(languge, score, movie_id): list of reviews with the same score with the same language}
    L2 = dict()  # {(language, score): None}
    L3 = dict()  # {score: None}
    for _, row in df.iterrows():
        lang = row["Language"]
        movie_id = row["Movie_ID"]
        score = row["Score"]
        review = row["rev_vec"]

        data_dict.setdefault(lang, {})
        data_dict[lang].setdefault(score, {})
        data_dict[lang][score].setdefault(movie_id, [])
        data_dict[lang][score][movie_id].append(review)
        
        L1.setdefault((lang, score, movie_id), list())
        L1[(lang, score, movie_id)].append(review)
        if get_L2and3:    
            L2[(lang, score)] = None
            L3[score] = None
    if get_L2and3:
        return data_dict, L1, L2, L3
    return data_dict, L1

In [16]:
def get_L2(LSM_R, data_dict):
    L2 = dict()  # {(language, score): list of movies vectors}
    for language in data_dict:
        for score in data_dict[language]:
            for movie_id in data_dict[language][score]:
                L2.setdefault((language, score), list())
                L2[(language, score)].append(LSM_R[(language, score, movie_id)])
    return L2

In [17]:
def get_L3(LS_MR, data_dict):
    L3 = dict()  # {score: vector of merged languages for that score}
    for language in data_dict:
        for score in data_dict[language]:
            L3.setdefault(score, list())
            L3[score].append(LS_MR[(language, score)])
    return L3

In [18]:
def merge(L, W):
    merged = dict()  # {item: vector of merged subitems}
    for i, item in enumerate(sorted(L)):
        for subitem in L[item]:
            merged.setdefault(item, [np.zeros(VECTOR_SIZE),0])
            merged[item][0] += sigmoid(subitem.dot(W[i]))
            merged[item][1] += 1
    for item in merged:
        merged[item] = merged[item][0]/ merged[item][1]
    return merged

In [19]:
def update_weights(L, delta, W, alpha=0.1):
    for i, k in enumerate(sorted(L)):
        for l in L[k]:
            W[i] += l.T.dot(delta[i]) *alpha
    return W

In [20]:
def get_layer_error(delta, W):
    error = 0
    for i in range(len(delta)):
        error += delta[i].dot(W[i].T)
    return error/len(delta)

In [21]:
def get_layer_delta(error, layer, size):
    delta = np.zeros((size, VECTOR_SIZE))
    j = 0
    for i,k in enumerate(sorted(layer)):
        for l in layer[k]:
            delta[j] = error[i]*sigmoid(l, True)
            j += 1
    return delta

## Training

## Full-batch

In [22]:
def get_score_vects(df, iterations=100, alpha=0.1, random_state=42, W1=None, W2=None, W3=None, W4=None):
    LSMR = preprocess_data(df)
    data_dict, L1 = get_data_dict(LSMR)
    y = softmax(list(LSMR.Score))
#     np.random.seed(random_state)
    learning_curve = dict()
    for i in range(iterations+1):
        # forward propagation
        if W1 is None:
            W1 = 2*np.random.random((len(L1), 300, 300))-1

        LSM_R = merge(L1, W1)
        L2 = get_L2(LSM_R, data_dict)
        if W2 is None:
            W2 = 2*np.random.random((len(L2), 300, 300))-1

        LS_MR = merge(L2, W2)
        L3 = get_L3(LS_MR, data_dict)
        if W3 is None:
            W3 = 2*np.random.random((len(L3), 300, 300))-1

        score_vectors_dict = merge(L3, W3)
        l4 = sigmoid(np.array([v for k, v in sorted(score_vectors_dict.items())]))
        if W4 is None:
            W4 = 2*np.random.random((300, len(LSMR)))-1
        
        l5 = softmax(l4.dot(W4))  # predicted scores
        
        # Calculate the error
        l5_error = np.mean(np.dot(np.log(l5), y))
        
        # Back propagation
        l5_delta = l5_error * sigmoid(l5, True)
        W4 += l4.T.dot(l5_delta)*alpha
        
        l4_error = l5_delta.dot(W4.T)
        l4_delta = l4_error * sigmoid(l4, True)
        
        W3 = update_weights(L3, l4_delta, W3, alpha)
        
        l3_error = get_layer_error(l4_delta, W3)
        l3_delta = get_layer_delta(l3_error, L3, len(L2))
        
        W2 = update_weights(L2, l3_delta, W2, alpha)
        
        l2_error = get_layer_error(l3_delta, W2)
        l2_delta = get_layer_delta(l2_error, L2, len(LSMR))
        
        W1 = update_weights(L1, l2_delta, W1, alpha)
        learning_curve[i] = l5_error
        if i%10 == 0:
            print("epoch {}:\t{}".format(i, np.abs(l5_error)))
        if i%100 == 0:
            alpha *= 0.9
    return LSMR, score_vectors_dict, learning_curve

In [23]:
def fit(LSMR, score_vect_dicts,random_state=42, regressor=MLPRegressor(), classifier=MLPClassifier()):
    LSMR["score_vec"] = LSMR["Score"].apply(lambda x: score_vect_dicts[x] if x in score_vect_dicts else np.NaN)
    LSMR.dropna(inplace=True)
    
    X, Y, y = get_XYy(LSMR)
    
    regressor.random_state = random_state
    classifier.random_state = random_state
        
    regressor.fit(X, Y)
    classifier.fit(Y, y)
    return regressor, classifier

In [24]:
def predict(LSMR, score_vect_dicts, regressor, classifier):
    LSMR["score_vec"] = LSMR["Score"].apply(lambda x: score_vect_dicts[x] if x in score_vect_dicts else np.NaN)
    LSMR.dropna(inplace=True)
    
    X, Y, y = get_XYy(LSMR)
    
    preds_score_vecs = regressor.predict(X)
    pred_scores = classifier.predict(preds_score_vecs)
    
    return pred_scores, y

In [25]:
tronly_test_raw = df_full[-100:]
tronly_test = preprocess_data(tronly_test_raw)
df = df_full[:-100]
tronly_test[tronly_test.Language=="en"].count()

Language    0
Movie_ID    0
Score       0
rev_vec     0
dtype: int64

In [32]:
def eval_models(model, train, test, tronly, ytrain, ytest, ytronly):
    _ = time.time()
    model.fit(train, ytrain)
    predtra = time.time()-_
    
    _ = time.time()
    predtrain = model.predict(train)
    trat = time.time()-_
    s_train = distance_accuracy(ytrain, predtrain)
    f1_train = f1_score(ytrain, predtrain, average='weighted')
    
    _ = time.time()
    predtest= model.predict(test)
    tet = time.time()-_
    s_test = distance_accuracy(ytest, predtest)
    f1_test = f1_score(ytest, predtest, average='weighted')
    
    _ = time.time()
    predtronly = model.predict(tronly)
    trt = time.time()-_
    s_tr = distance_accuracy(ytronly, predtronly)
    f1_tronly = f1_score(ytronly, predtronly, average='weighted')
    
    evals = OrderedDict()
    evals["Train"] = s_train
    evals["Test"] = s_test
    evals["Tr. Only"] = s_tr
    evals["Training Time"] = trat
    evals["Pred.Tra. Time"] = predtra
    evals["Testing Time"] = tet
    evals["Tr.Test Time"] = trt
    evals["F1 Test"] = f1_test
    evals["F1 Train"] = f1_train
    evals["F1 Tr. only"] = f1_tronly
    return evals

In [27]:
def get_total_average(scores_tables):
#     scores_tables: {i_th trial:
#                     {k_th fold:
#                         {'Model': {'Test': 0.8090301003344482,
#                                    'Train': 0.783361064891847,
#                                    'Turkish only': 0.7414285714285714}}}
    avgs = dict()
    for trial in scores_tables:
        for table in scores_tables[trial]:
            for model in scores_tables[trial][table]:
                avgs.setdefault(model, dict())
                for metric, score in scores_tables[trial][table][model].items():
                    avgs[model].setdefault(metric, list())
                    avgs[model][metric].append(score)
    for model in avgs:
        for metric in avgs[model]:
            avgs[model][metric] = np.mean(avgs[model][metric])
    return pd.DataFrame(avgs)

In [28]:
def get_trial_score(trial_scores_tables):
#  trial_scores_tables: {k_th fold:
#                             {'Model': {'Test': 0.8090301003344482,
#                                        'Train': 0.783361064891847,
#                                        'Turkish only': 0.7414285714285714}}}
    avgs = dict()
    for table in trial_scores_tables:
        for model in trial_scores_tables[table]:
            avgs.setdefault(model, dict())
            for metric, score in trial_scores_tables[table][model].items():
                avgs[model].setdefault(metric, list())
                avgs[model][metric].append(score)
    for model in avgs:
        for metric in avgs[model]:
            avgs[model][metric] = np.mean(avgs[model][metric])
    return pd.DataFrame(avgs)

In [61]:
def eval_selectivewaves_regclass(df, tronly_test_raw, NUM_TRIALS=1, splits=10):  
    learning_curves = dict()
    scores_tables = OrderedDict()
    for i in range(NUM_TRIALS):
        print("Trial:\t{}".format(i+1))
        scores_tables[i] = OrderedDict()
        learning_curves[i] = OrderedDict()
        k = 0
        skf = StratifiedKFold(n_splits=splits, random_state=i)
        for train_index, test_index in skf.split(df["Review"], df["Language"]):
            print("K:\t{}".format(k+1))
            scores_tables[i][k] = OrderedDict()
            start = time.time()
            LSMR, score_vect_dicts, training_curve = get_score_vects(
                df.loc[train_index], random_state=i, alpha=1e-5, iterations=50)
            regressor, classifier = fit(LSMR, score_vect_dicts, random_state=i)
            trat = time.time()- start

            test_data = preprocess_data(df.loc[test_index])
            _ = time.time()
            preds, true = predict(test_data, score_vect_dicts, regressor, classifier)
            tet = time.time()-_

            _ = time.time()
            preds_train, true_train = predict(preprocess_data(df.loc[train_index]),
                                              score_vect_dicts,
                                              regressor, classifier)
            predtra = time.time()-_

            _ = time.time()
            preds_tr, true_tr = predict(tronly_test, score_vect_dicts, regressor, classifier)
            trt = time.time()-_

            elapsed = time.time()-start

            s = distance_accuracy(true, preds)
            f1_test = f1_score(true, preds, average='weighted')

            s_train = distance_accuracy(true_train, preds_train)
            f1_train = f1_score(true_train, preds_train, average='weighted')

            s_tr = distance_accuracy(true_tr, preds_tr)
            f1_tronly = f1_test = f1_score(true_tr, preds_tr, average='weighted')        


            lr = LogisticRegression(random_state=i)
            mlp = MLPClassifier(random_state=i)
            rf = RandomForestClassifier(random_state=i,n_jobs=-1)
            train_mat = np.array(list(LSMR["rev_vec"]))
            test_mat = np.array(list(test_data["rev_vec"]))
            tronly_mat = np.array(list(tronly_test["rev_vec"]))

            evals = OrderedDict()
            evals["Train"] = s_train
            evals["Test"] = s
            evals["Tr. Only"] = s_tr
            evals["Training Time"] = trat
            evals["Pred.Tra. Time"] = predtra
            evals["Testing Time"] = tet
            evals["Tr.Test Time"] = trt
            evals["F1 Test"] = f1_test
            evals["F1 Train"] = f1_train
            evals["F1 Tr. only"] = f1_tronly
            scores_tables[i][k]["DeepSelect"] = evals
            scores_tables[i][k]["MLP"] = eval_models(
                mlp, train_mat, test_mat, tronly_mat, true_train, true, true_tr)
            scores_tables[i][k]["Logistic Regression"] = eval_models(
                lr, train_mat, test_mat, tronly_mat, true_train, true, true_tr)
            scores_tables[i][k]["RandomForest"] = eval_models(
                rf, train_mat, test_mat, tronly_mat, true_train, true, true_tr)

            print()
            print("K:\t{}".format(k+1))
            print(pd.DataFrame(scores_tables[i][k]))
            print("\nThis fold took:", elapsed, "seconds\n")
            learning_curves[i][k] = training_curve
            k += 1
            print("*"*10+"\n")
        print("Average scores for trial {}".format(i))
        print(get_trial_score(scores_tables[i]))
        print("-"*30)
    print("%%"*20)
    print("Average of {} trials".format(NUM_TRIALS))
    print(get_total_average(scores_tables))
    return scores_tables

In [88]:
scores_tables = eval_selectivewaves_regclass(df, tronly_test_raw)
pickle.dump(scores_tables, open("batch_no_tf_tables.results", "wb"))

Trial:	1
K:	1
epoch 0:	24.76686600092429
epoch 10:	24.450570421325914
epoch 20:	24.044721181809056
epoch 30:	23.659809859445378
epoch 40:	23.372617877461806
epoch 50:	23.145493307660868


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)



K:	1
                DeepSelect       MLP  Logistic Regression  RandomForest
F1 Test           0.202264  0.205265             0.196250      0.242046
F1 Tr. only       0.202264  0.240859             0.303204      0.287112
F1 Train          0.236986  0.331770             0.336056      0.995061
Pred.Tra. Time    1.799902  0.455240             0.177022      0.108307
Test              0.803704  0.811111             0.802469      0.798765
Testing Time      0.009492  0.000347             0.000145      0.105107
Tr. Only          0.804286  0.798571             0.781429      0.714286
Tr.Test Time      0.013290  0.000324             0.000156      0.106326
Train             0.812840  0.823333             0.823086      0.999259
Training Time    15.069148  0.004432             0.000827      0.105972

This fold took: 17.099502563476562 seconds

**********

K:	2
epoch 0:	24.92346767130208
epoch 10:	24.564709301632753
epoch 20:	24.2073859671301
epoch 30:	23.935115071168564
epoch 40:	23.6987205718208
e

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)



K:	2
                DeepSelect       MLP  Logistic Regression  RandomForest
F1 Test           0.181481  0.208627             0.217063      0.270889
F1 Tr. only       0.181481  0.321167             0.297630      0.267191
F1 Train          0.066203  0.334973             0.329319      0.992585
Pred.Tra. Time    1.777080  0.431015             0.162320      0.108316
Test              0.791111  0.772222             0.773333      0.793333
Testing Time      0.009697  0.000659             0.000160      0.105429
Tr. Only          0.801429  0.817143             0.782857      0.748571
Tr.Test Time      0.013545  0.000781             0.000156      0.105695
Train             0.825802  0.829877             0.829506      0.998889
Training Time    14.483696  0.001688             0.000736      0.106070

This fold took: 16.476651191711426 seconds

**********

K:	3
epoch 0:	24.38693266071134
epoch 10:	24.36671859100121
epoch 20:	24.141229575106586
epoch 30:	23.950204384628034
epoch 40:	23.76446889932159

  'precision', 'predicted', average, warn_for)



K:	3
                DeepSelect       MLP  Logistic Regression  RandomForest
F1 Test           0.180303  0.199350             0.178009      0.211884
F1 Tr. only       0.180303  0.298937             0.302416      0.282611
F1 Train          0.239626  0.318162             0.325155      0.993821
Pred.Tra. Time    1.703255  0.476871             0.174251      0.108089
Test              0.783951  0.771605             0.756790      0.770370
Testing Time      0.008917  0.000309             0.000123      0.103327
Tr. Only          0.798571  0.797143             0.780000      0.754286
Tr.Test Time      0.014533  0.000587             0.000115      0.103675
Train             0.816420  0.826420             0.827531      0.998395
Training Time    15.074522  0.002025             0.000899      0.103266

This fold took: 17.02352285385132 seconds

**********

K:	4
epoch 0:	22.97082298668301
epoch 10:	22.82736282802089
epoch 20:	22.696021554263883
epoch 30:	22.547027378871025
epoch 40:	22.4013970031759
e

  'precision', 'predicted', average, warn_for)



K:	4
                DeepSelect       MLP  Logistic Regression  RandomForest
F1 Test           0.171136  0.254857             0.255074      0.293663
F1 Tr. only       0.171136  0.323662             0.341750      0.307138
F1 Train          0.178423  0.299336             0.310901      0.996292
Pred.Tra. Time    1.826833  0.604097             0.166308      0.109244
Test              0.813333  0.807778             0.805556      0.785556
Testing Time      0.008838  0.000611             0.000100      0.104795
Tr. Only          0.730000  0.787143             0.801429      0.790000
Tr.Test Time      0.014428  0.000319             0.000102      0.104845
Train             0.792222  0.821111             0.823457      0.999259
Training Time    13.542790  0.001891             0.000707      0.103283

This fold took: 15.608597040176392 seconds

**********

K:	5
epoch 0:	21.27403529190793
epoch 10:	21.150054938585143
epoch 20:	20.966579648248388
epoch 30:	20.77070906470299
epoch 40:	20.61586152530190

  'precision', 'predicted', average, warn_for)



K:	5
                DeepSelect       MLP  Logistic Regression  RandomForest
F1 Test           0.202264  0.289502             0.241477      0.261729
F1 Tr. only       0.202264  0.313378             0.287556      0.279572
F1 Train          0.222821  0.288964             0.306291      0.996295
Pred.Tra. Time    1.790706  0.687872             0.167190      0.108454
Test              0.813580  0.809877             0.792593      0.746914
Testing Time      0.008772  0.000309             0.000109      0.101528
Tr. Only          0.804286  0.804286             0.775714      0.742857
Tr.Test Time      0.014600  0.000322             0.000135      0.101608
Train             0.812099  0.823827             0.820617      0.999259
Training Time    15.534183  0.001626             0.000750      0.102017

This fold took: 17.551403522491455 seconds

**********

K:	6
epoch 0:	23.328142026006237
epoch 10:	22.943384216296288
epoch 20:	22.669671286466247
epoch 30:	22.547168520151004
epoch 40:	22.340511062595

  'precision', 'predicted', average, warn_for)



K:	6
                DeepSelect       MLP  Logistic Regression  RandomForest
F1 Test           0.202264  0.228530             0.243285      0.258241
F1 Tr. only       0.202264  0.370055             0.292236      0.308313
F1 Train          0.228288  0.321539             0.334170      0.991355
Pred.Tra. Time    1.811466  0.558301             0.192604      0.108248
Test              0.801111  0.790000             0.793333      0.805556
Testing Time      0.010818  0.014003             0.000101      0.105598
Tr. Only          0.804286  0.810000             0.767143      0.747143
Tr.Test Time      0.013106  0.000635             0.000111      0.105049
Train             0.814938  0.829630             0.827901      0.998025
Training Time    16.667842  0.021832             0.000720      0.103447

This fold took: 18.714858531951904 seconds

**********

K:	7
epoch 0:	22.822449440498122
epoch 10:	22.487452789556254
epoch 20:	22.203279160148504
epoch 30:	21.99624966967625
epoch 40:	21.8296425958525

  'precision', 'predicted', average, warn_for)



K:	7
                DeepSelect       MLP  Logistic Regression  RandomForest
F1 Test           0.197836  0.261481             0.224081      0.265556
F1 Tr. only       0.197836  0.383894             0.332195      0.335966
F1 Train          0.228624  0.350501             0.325914      0.991345
Pred.Tra. Time    1.867791  1.079048             0.158678      0.107991
Test              0.800000  0.787778             0.783333      0.777778
Testing Time      0.009044  0.000311             0.000105      0.105295
Tr. Only          0.801429  0.814286             0.788571      0.771429
Tr.Test Time      0.013583  0.000318             0.000100      0.104778
Train             0.815926  0.834198             0.827160      0.998148
Training Time    14.991585  0.001625             0.000746      0.103639

This fold took: 17.10541844367981 seconds

**********

K:	8
epoch 0:	23.57382025575771
epoch 10:	23.405012021192853
epoch 20:	23.026254836512017
epoch 30:	22.713405503505832
epoch 40:	22.44181242019359

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)



K:	8
                DeepSelect       MLP  Logistic Regression  RandomForest
F1 Test           0.281460  0.219282             0.240992      0.236479
F1 Tr. only       0.281460  0.321095             0.348020      0.231954
F1 Train          0.205937  0.282132             0.313870      0.993809
Pred.Tra. Time    1.858663  0.651061             0.187132      0.107837
Test              0.841667  0.816667             0.825000      0.775000
Testing Time      0.009346  0.000345             0.000113      0.104412
Tr. Only          0.750000  0.821429             0.800000      0.688571
Tr.Test Time      0.010101  0.000335             0.000146      0.105883
Train             0.792716  0.819383             0.821235      0.998025
Training Time    17.200065  0.001734             0.000848      0.102837

This fold took: 19.294219493865967 seconds

**********

K:	9
epoch 0:	19.9516426349678
epoch 10:	19.848213856978084
epoch 20:	19.74263647426656
epoch 30:	19.618040343969476
epoch 40:	19.512793585691988

  'precision', 'predicted', average, warn_for)



K:	9
                DeepSelect       MLP  Logistic Regression  RandomForest
F1 Test           0.206488  0.185791             0.133072      0.230364
F1 Tr. only       0.206488  0.341620             0.308476      0.303576
F1 Train          0.275839  0.438538             0.336940      0.995064
Pred.Tra. Time    1.765091  1.229238             0.167646      0.108062
Test              0.776667  0.768889             0.757778      0.754444
Testing Time      0.009656  0.000679             0.000111      0.104964
Tr. Only          0.797143  0.800000             0.781429      0.737143
Tr.Test Time      0.014234  0.000328             0.000101      0.104991
Train             0.819259  0.845679             0.828148      0.998642
Training Time    14.691512  0.001661             0.000756      0.103207

This fold took: 16.728501319885254 seconds

**********

K:	10
epoch 0:	24.667802115791478
epoch 10:	24.143970182603308
epoch 20:	23.83940182489085
epoch 30:	23.550858148647933
epoch 40:	23.329828839194

  'precision', 'predicted', average, warn_for)


## Using the full network for prediction
### P.S. this variation supports online (incremental) training

In [89]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [90]:
def get_test(LSMR):
    X = dict()
    y = dict()
    for _, row in LSMR.iterrows():
        score = row["Score"]
        y_ = np.zeros(10)
        y_[score-1] = 1
        y[len(y)] = y_
        X[len(X)] = row["rev_vec"]
    return np.array(list(X.values())), np.array(list(y.values()))

In [91]:
def train_selective(df_train,epochs=100, learning_rate = 0.1, random_state=42, p_every=10):
    LSMR_train = preprocess_data(df_train)
    np.random.seed(random_state)
    data_dict, L1, L2, L3 = get_data_dict(LSMR_train, get_L2and3=True)
    init_weights = lambda layer, i, o: {k:2*np.random.random((i, o))-1 for k in layer}
    W1 = init_weights(L1, 300, 300)  # (languge, score, movie_id)
    W2 = init_weights(L2, 300, 300)  # (languge, score):
    W3 = init_weights(L3, 300, 10)  # score:
    
    
    reset_graph()
    x = tf.placeholder(tf.float32, [None, 300])
    y = tf.placeholder(tf.float32, [None, 10]) # 1-10 => 10 classes

    w1 = tf.placeholder(tf.float32, [300, 300])
    w2 = tf.placeholder(tf.float32, [300, 300])
    w3 = tf.placeholder(tf.float32, [300, 10])

    b1 = tf.Variable(tf.zeros([300]))
    b2 = tf.Variable(tf.zeros([300]))
    b3 = tf.Variable(tf.zeros([10]))

    l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
    l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
    pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)


    cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    training_curve = dict()
    with tf.device('/job:localhost/replica:0/task:0/device:GPU:0'):
        with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
            sess.run(tf.global_variables_initializer())
            for e in range(epochs+1):
                avg_cost = 0.
                for _, row in LSMR_train.iterrows():
                    lang = row["Language"]
                    movie_id = row["Movie_ID"]
                    score = row["Score"]
                    y_ = np.zeros(10)
                    y_[score-1] = 1
                    y_ = np.atleast_2d(y_)
                    x_ = np.atleast_2d(row["rev_vec"])
                    w1_,w2_,w3_,_, c = sess.run([w1, w2, w3, optimizer, cost],
                                             feed_dict={x: x_,
                                                        y: y_,
                                                        w1:W1[(lang, score, movie_id)],
                                                        w2:W2[(lang, score)],
                                                        w3:W3[score]})
                    W1[(lang, score, movie_id)] = w1_
                    W2[(lang, score)] = w2_
                    W3[score] = w3_

                    avg_cost += c
                training_curve[e] = avg_cost
                if e%p_every==0:
                    print("Epoch {}: {}".format(e, avg_cost/len(LSMR_train)))

            return W1, W2, W3, training_curve

In [92]:
def get_max_index(array):
    indx = None
    max_ = float("-inf")
    for i, e in enumerate(array):
        if e > max_:
            max_ = e
            indx = i
    return indx, max_

In [93]:
def predict_selective(df, W1, W2, W3):
    LSMR = preprocess_data(df)
    reset_graph()
    x = tf.placeholder(tf.float32, [None, 300])

    w1 = tf.placeholder(tf.float32, [300, 300])
    w2 = tf.placeholder(tf.float32, [300, 300])
    w3 = tf.placeholder(tf.float32, [300, 10])

    b1 = tf.Variable(tf.zeros([300]))
    b2 = tf.Variable(tf.zeros([300]))
    b3 = tf.Variable(tf.zeros([10]))

    l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
    l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
    pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)

    
    
    
    prediction = tf.argmax(pred, 1)
    preds = np.zeros(len(LSMR))
    with tf.device('/job:localhost/replica:0/task:0/device:GPU:0'):
        with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
            sess.run(tf.global_variables_initializer())
            j = 0
            for _, row in LSMR.iterrows():
                v = row["rev_vec"]
                predicted_scores = np.zeros(len(W1))
                for i, info in enumerate(W1):
                    language, score, movie_id = info
                    w_1 = W1[(language, score, movie_id)]
                    w_2 = W2[(language, score)]
                    w_3 = W3[score]

                    predicted_scores[i] = prediction.eval({x: np.atleast_2d(v),
                                                           w1:w_1,w2:w_2,w3:w_3})

                max_index, probability = get_max_index(softmax(predicted_scores))
                predicted_score = predicted_scores[max_index]

                preds[j] = predicted_score
                j+=1


    return preds, np.array(list(LSMR.Score))

In [94]:
def eval_selectivewaves_nn(df, tronly_test_raw, NUM_TRIALS=1, splits=10):  
    learning_curves = OrderedDict()
    scores_tables_nn = OrderedDict()
    for i in range(NUM_TRIALS):
        print("Trial:\t{}".format(i+1))
        learning_curves[i] = OrderedDict()
        k = 0
        skf = StratifiedKFold(n_splits=splits, random_state=i)
        scores_tables_nn[i] = dict()
        for train_index, test_index in skf.split(df["Review"], df["Language"]):
            print("K: \t{}".format(k+1))
            scores_tables_nn[i][k] = OrderedDict()
            start = time.time()
            # approx 3 epochs per second
            LSMR = preprocess_data(df.loc[train_index])
            W1, W2, W3, training_curve = train_selective(df.loc[train_index], epochs=150, p_every=25)
            _ = time.time()
            trat = _-start
            print("Took: {} for training".format(trat))

            _ = time.time()
            preds_train, true_train = predict_selective(df.loc[train_index], W1, W2, W3)
            predtra = time.time()-_
            print("Took: {} for predicting {} training instances".format(predtra, len(train_index)))

            test_data = preprocess_data(df.loc[test_index])
            _ = time.time()
            preds, true = predict_selective(df.loc[test_index], W1, W2, W3)
            tet = time.time()-_
            print("Took: {} for predicting {} test instances".format(tet, len(test_index)))

            _ = time.time()
            preds_tr, true_tr = predict_selective(tronly_test_raw, W1, W2, W3)
            trt = time.time()-_
            print("Took: {} for predicting {} Turkish test instances".format(trt, len(tronly_test)))

            elapsed = time.time()-start

            s = distance_accuracy(true, preds)
            s_train = distance_accuracy(true_train, preds_train)
            s_tr = distance_accuracy(true_tr, preds_tr)

            f1_test = f1_score(true, preds, average='weighted')
            f1_train = f1_score(true_train, preds_train, average='weighted')
            f1_tronly = f1_score(true_tr, preds_tr, average='weighted')

            mlp = MLPClassifier(random_state=i)
            lr = LogisticRegression(random_state=i)
            rf = RandomForestClassifier(random_state=i,n_jobs=-1)
            train_mat = np.array(list(LSMR["rev_vec"]))
            test_mat = np.array(list(test_data["rev_vec"]))
            tronly_mat = np.array(list(tronly_test["rev_vec"]))

            evals = OrderedDict()
            evals["Train"] = s_train
            evals["Test"] = s
            evals["Tr. Only"] = s_tr
            evals["Training Time"] = trat
            evals["Pred.Tra. Time"] = predtra
            evals["Testing Time"] = tet
            evals["Tr.Test Time"] = trt
            evals["F1 Test"] = f1_test
            evals["F1 Train"] = f1_train
            evals["F1 Tr. only"] = f1_tronly
            scores_tables_nn[i][k]["DeepSelect"] = evals

            scores_tables_nn[i][k]["LogisticRegression"] = eval_models(lr, train_mat, test_mat, tronly_mat, true_train, true, true_tr)
            scores_tables_nn[i][k]["MLP"] = eval_models(mlp, train_mat, test_mat, tronly_mat, true_train, true, true_tr)
            scores_tables_nn[i][k]["RandomForest"] = eval_models(rf, train_mat, test_mat, tronly_mat, true_train, true, true_tr)

            print()
            print(pd.DataFrame(scores_tables_nn[i][k]))
            print("took:", elapsed, "seconds\n")
            learning_curves[i][k] = training_curve
            k += 1
            print("*"*10+"\n")
        print("Average scores for trial {}".format(i))
        print(get_trial_score(scores_tables_nn[i]))
        print("-"*30)
    print("%%"*20)
    print("Average of {} trials".format(NUM_TRIALS))
    print(get_total_average(scores_tables_nn))
    return scores_tables_nn

In [95]:
scores_tables_nn = eval_selectivewaves_nn(df, tronly_test_raw)
pickle.dump(scores_tables_nn, open("incremental_tf_tables.results", "wb"))

Trial:	1
K: 	1
Epoch 0: 1.9773333567116693
Epoch 25: 0.003818575064216644
Epoch 50: 0.0019198051157985323
Epoch 75: 0.0012996401052061156
Epoch 100: 0.000988080382333894
Epoch 125: 0.0007996864090740125
Epoch 150: 0.0006731128424478581
Took: 183.70366883277893 for training
Took: 615.1565337181091 for predicting 810 training instances
Took: 65.2965362071991 for predicting 90 test instances
Took: 73.59039640426636 for predicting 100 Turkish test instances


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)



                DeepSelect  LogisticRegression       MLP  RandomForest
F1 Test           0.120773            0.196250  0.205265      0.242046
F1 Tr. only       0.021802            0.303204  0.240859      0.287112
F1 Train          0.050270            0.336056  0.331770      0.995061
Pred.Tra. Time  615.156534            0.175007  0.563957      0.111384
Test              0.833333            0.802469  0.811111      0.798765
Testing Time     65.296536            0.000103  0.000311      0.105492
Tr. Only          0.787143            0.781429  0.798571      0.714286
Tr.Test Time     73.590396            0.000098  0.000322      0.105752
Train             0.822469            0.823086  0.823333      0.999259
Training Time   183.703669            0.000723  0.001884      0.102581
took: 937.9249000549316 seconds

**********

K: 	2
Epoch 0: 2.3775782500775855
Epoch 25: 0.004348101737080323
Epoch 50: 0.0020806660097174457
Epoch 75: 0.0013976375215784987
Epoch 100: 0.0010624149531730678
Epoch 125: 

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)



                DeepSelect  LogisticRegression       MLP  RandomForest
F1 Test           0.050294            0.217063  0.208627      0.270889
F1 Tr. only       0.150864            0.297630  0.321167      0.267191
F1 Train          0.087336            0.329319  0.334973      0.992585
Pred.Tra. Time  513.924134            0.169826  0.434763      0.127065
Test              0.783333            0.773333  0.772222      0.793333
Testing Time     59.309341            0.000108  0.000311      0.104064
Tr. Only          0.795714            0.782857  0.817143      0.748571
Tr.Test Time     63.074502            0.000108  0.000331      0.105528
Train             0.823457            0.829506  0.829877      0.998889
Training Time   180.361396            0.000723  0.001676      0.102903
took: 816.843035697937 seconds

**********

K: 	3
Epoch 0: 2.1105157300578643
Epoch 25: 0.004087267299817969
Epoch 50: 0.0019113149940726908
Epoch 75: 0.0012732374629505147
Epoch 100: 0.0009633172474697583
Epoch 125: 0

  'precision', 'predicted', average, warn_for)



                DeepSelect  LogisticRegression       MLP  RandomForest
F1 Test           0.066667            0.178009  0.199350      0.211884
F1 Tr. only       0.021802            0.302416  0.298937      0.282611
F1 Train          0.055054            0.325155  0.318162      0.993821
Pred.Tra. Time  520.128438            0.176462  0.427962      0.129727
Test              0.796296            0.756790  0.771605      0.770370
Testing Time     55.579052            0.000159  0.000334      0.105165
Tr. Only          0.787143            0.780000  0.797143      0.754286
Tr.Test Time     62.733270            0.000147  0.000330      0.104523
Train             0.826173            0.827531  0.826420      0.998395
Training Time   192.575934            0.000753  0.001710      0.106050
took: 831.2531008720398 seconds

**********

K: 	4
Epoch 0: 2.47723830999529
Epoch 25: 0.006543009063156722
Epoch 50: 0.002836956465667322
Epoch 75: 0.001875388481338503
Epoch 100: 0.0014193576503870459
Epoch 125: 0.00

  'precision', 'predicted', average, warn_for)



                DeepSelect  LogisticRegression       MLP  RandomForest
F1 Test           0.041880            0.255074  0.254857      0.293663
F1 Tr. only       0.021802            0.341750  0.323662      0.307138
F1 Train          0.057870            0.310901  0.299336      0.996292
Pred.Tra. Time  533.933701            0.148086  0.375179      0.110834
Test              0.824444            0.805556  0.807778      0.785556
Testing Time     56.722768            0.000132  0.000314      0.104963
Tr. Only          0.787143            0.801429  0.787143      0.790000
Tr.Test Time     62.730045            0.000101  0.000318      0.104890
Train             0.825309            0.823457  0.821111      0.999259
Training Time   214.428659            0.000704  0.001636      0.102548
took: 868.0068027973175 seconds

**********

K: 	5
Epoch 0: 2.4254132675827416
Epoch 25: 0.007124853473765367
Epoch 50: 0.0029632288716947815
Epoch 75: 0.001946002681710592
Epoch 100: 0.0014707003281763337
Epoch 125: 0

  'precision', 'predicted', average, warn_for)



                DeepSelect  LogisticRegression       MLP  RandomForest
F1 Test           0.031373            0.241477  0.289502      0.261729
F1 Tr. only       0.021802            0.287556  0.313378      0.279572
F1 Train          0.059300            0.306291  0.288964      0.996295
Pred.Tra. Time  523.990801            0.144853  0.490867      0.110879
Test              0.806173            0.792593  0.809877      0.746914
Testing Time     58.466104            0.000131  0.000310      0.104856
Tr. Only          0.787143            0.775714  0.804286      0.742857
Tr.Test Time     65.260053            0.000136  0.000318      0.104731
Train             0.825185            0.820617  0.823827      0.999259
Training Time   193.849310            0.000654  0.001686      0.102402
took: 841.7475633621216 seconds

**********

K: 	6
Epoch 0: 2.5140747782521813
Epoch 25: 0.004502532258827418
Epoch 50: 0.002211785128756912
Epoch 75: 0.0014994027829871998
Epoch 100: 0.001145901822772649
Epoch 125: 0.

  'precision', 'predicted', average, warn_for)



                DeepSelect  LogisticRegression       MLP  RandomForest
F1 Test           0.026623            0.243285  0.228530      0.258241
F1 Tr. only       0.021802            0.292236  0.370055      0.308313
F1 Train          0.060083            0.334170  0.321539      0.991355
Pred.Tra. Time  521.019739            0.149468  0.518251      0.135072
Test              0.803333            0.793333  0.790000      0.805556
Testing Time     57.464112            0.000103  0.000301      0.104838
Tr. Only          0.787143            0.767143  0.810000      0.747143
Tr.Test Time     64.155338            0.000103  0.000323      0.104754
Train             0.827778            0.827901  0.829630      0.998025
Training Time   193.823402            0.000649  0.001684      0.103200
took: 836.6289052963257 seconds

**********

K: 	7
Epoch 0: 2.0252993080216304
Epoch 25: 0.004158288826428316
Epoch 50: 0.0019495924537374248
Epoch 75: 0.0013167823257401639
Epoch 100: 0.001006455283062156
Epoch 125: 0

  'precision', 'predicted', average, warn_for)



                DeepSelect  LogisticRegression       MLP  RandomForest
F1 Test           0.026623            0.224081  0.261481      0.265556
F1 Tr. only       0.021802            0.332195  0.383894      0.335966
F1 Train          0.060021            0.325914  0.350501      0.991345
Pred.Tra. Time  536.143471            0.154256  0.783379      0.111752
Test              0.795556            0.783333  0.787778      0.777778
Testing Time     58.274605            0.000102  0.000305      0.104939
Tr. Only          0.787143            0.788571  0.814286      0.771429
Tr.Test Time     64.166510            0.000101  0.000321      0.105597
Train             0.828519            0.827160  0.834198      0.998148
Training Time   194.566176            0.000626  0.001614      0.102175
took: 853.3530278205872 seconds

**********

K: 	8
Epoch 0: 2.386953772464716
Epoch 25: 0.005820904014874234
Epoch 50: 0.0026788789231635534
Epoch 75: 0.0017896633637661456
Epoch 100: 0.001360831037431109
Epoch 125: 0.

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)



                DeepSelect  LogisticRegression       MLP  RandomForest
F1 Test           0.112281            0.240992  0.219282      0.236479
F1 Tr. only       0.021802            0.348020  0.321095      0.231954
F1 Train          0.050942            0.313870  0.282132      0.993809
Pred.Tra. Time  519.396542            0.150652  0.478358      0.110773
Test              0.863889            0.825000  0.816667      0.775000
Testing Time     57.697960            0.000103  0.000320      0.104861
Tr. Only          0.787143            0.800000  0.821429      0.688571
Tr.Test Time     61.745113            0.000103  0.000319      0.105430
Train             0.817901            0.821235  0.819383      0.998025
Training Time   197.087621            0.000610  0.001600      0.103175
took: 836.1293518543243 seconds

**********

K: 	9
Epoch 0: 2.9021928498045635
Epoch 25: 0.005129504039105984
Epoch 50: 0.002507706557218379
Epoch 75: 0.0016960249718019924
Epoch 100: 0.0012941287556573733
Epoch 125: 0

  'precision', 'predicted', average, warn_for)



                DeepSelect  LogisticRegression       MLP  RandomForest
F1 Test           0.060021            0.133072  0.185791      0.230364
F1 Tr. only       0.021802            0.308476  0.341620      0.303576
F1 Train          0.055753            0.336940  0.438538      0.995064
Pred.Tra. Time  528.096885            0.151687  1.024017      0.110780
Test              0.798889            0.757778  0.768889      0.754444
Testing Time     55.992993            0.000116  0.000320      0.104057
Tr. Only          0.787143            0.781429  0.800000      0.737143
Tr.Test Time     61.026876            0.000130  0.000322      0.105283
Train             0.828148            0.828148  0.845679      0.998642
Training Time   191.994486            0.000657  0.001659      0.102299
took: 837.3354005813599 seconds

**********

K: 	10
Epoch 0: 2.313656463823308
Epoch 25: 0.004104317455820494
Epoch 50: 0.002030699017542741
Epoch 75: 0.0013742376939195242
Epoch 100: 0.0010469946722385733
Epoch 125: 0

  'precision', 'predicted', average, warn_for)


# Robustness test

In [103]:
en_revs = df_full[df_full.Language=="en"]
tr_revs = df_full[df_full.Language=="tr"]
test = tr_revs[-100:]
tr_revs = tr_revs[:-100]
robustness = dict()  # {(num of en reviews, num of tr reviews): scores_dict}
for en_size in range(1,11):
    for tr_size in range(1,11):
        en_train = en_revs.sample(frac=en_size/10.0)
        tr_train = tr_revs.sample(frac=tr_size/10.0)
        start = time.time()
        print("En: {}\tTr: {}".format(len(en_train),len(tr_train)))
        train = pd.concat([en_train, tr_train]).reset_index(drop=True)
        robustness_tables = dict()
        print("Using first variation (Regressor and Classifier with score vectors)")
        LSMR, score_vect_dicts, training_curve = get_score_vects(
                                        train, alpha=1e-5, iterations=50)
        regressor, classifier = fit(LSMR, score_vect_dicts)
        preds, true = predict(test_data, score_vect_dicts, regressor, classifier)
        s_regclass = distance_accuracy(true, preds)
#         f1_test_regclass = f1_score(true, preds, average='weighted')
        robustness_tables["DeepSelect (regclass)"] = s_regclass
    
        print("Using second variation (average of outputs produced by each set of weight matrices)")
        W1, W2, W3, training_curve = train_selective(train, epochs=150, p_every=25)
        preds_nn, true_nn = predict_selective(tr_revs, W1, W2, W3)
        s_nn = distance_accuracy(true_nn, preds_nn)
#         f1_test_nn = f1_score(true_nn, preds_nn, average='weighted')
        robustness_tables["DeepSelect"] = s_nn
    
        print("Using well-known algorithms: Logistic Regression, RandomForest and MLP")
        lr = LogisticRegression()
        rf = RandomForestClassifier(n_jobs=-1)
        mlp = MLPClassifier()
        
        train_mat = np.array(list(LSMR["rev_vec"]))
        train_y = np.array(list(LSMR["Score"]))
        test_mat = np.array(list(preprocess_data(tr_revs)["rev_vec"]))
        
        for name, model in [("Logistic Regression",lr),
                            ("RandomForest", rf),
                            ("MLP", mlp)]:
            model.fit(train_mat, train_y)
            robustness_tables[name] = distance_accuracy(true, model.predict(test_mat))
        robustness[(len(en_train),len(tr_train))] = robustness_tables
        print("Took: {}".format(time.time()-start))
        print("-"*50)

En: 50	Tr: 40
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	16.52820921224812
epoch 10:	16.39311794442145
epoch 20:	16.28053593823387
epoch 30:	16.17463017163827
epoch 40:	16.067564309247263
epoch 50:	15.964494623225281
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 7.339794548606778
Epoch 25: 0.08729174313691143
Epoch 50: 0.015980279772296057
Epoch 75: 0.008813390556133843
Epoch 100: 0.006229110769650169
Epoch 125: 0.004865745641478409
Epoch 150: 0.00401580884632106
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 67.90573191642761
--------------------------------------------------
En: 50	Tr: 80
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	20.17242087221081
epoch 10:	19.943121625922586
epoch 20:	19.747438776393345
epoch 30:	19.562883062061417
epoch 40:	19.343043317450203
epoch 50:	19.134988594722827
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 4.537748518779587
Epoch 25: 0.02896918413554466
Epoch 50: 0.011732149529388362
Epoch 75: 0.00750067419797303
Epoch 100: 0.00556290933663359
Epoch 125: 0.0044437062310127935
Epoch 150: 0.0037117885535350907
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 97.6483633518219
--------------------------------------------------
En: 50	Tr: 120
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	18.558661043428863
epoch 10:	18.275162920900748
epoch 20:	18.0690512800357
epoch 30:	18.011163026017886
epoch 40:	17.979688741120565
epoch 50:	17.893496160064807
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 5.529079747339197
Epoch 25: 0.020512997728527646
Epoch 50: 0.009145965525856488
Epoch 75: 0.006014972501543525
Epoch 100: 0.004513847238152213
Epoch 125: 0.003624595370665167
Epoch 150: 0.003033832769629311
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 115.3185076713562
--------------------------------------------------
En: 50	Tr: 160
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	20.650004065424067
epoch 10:	20.501010042838722
epoch 20:	20.39800701220522
epoch 30:	20.278491025736592
epoch 40:	20.11572724965626
epoch 50:	19.923114970322096
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 5.586502467327262
Epoch 25: 0.012770103482683311
Epoch 50: 0.00597685394368238
Epoch 75: 0.00401376832193299
Epoch 100: 0.0030581356741225005
Epoch 125: 0.0024870354660931427
Epoch 150: 0.0021050228487172803
Using well-known algorithms: Logistic Regression, RandomForest and MLP
Took: 147.89269638061523
--------------------------------------------------
En: 50	Tr: 200
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	19.207572227998376
epoch 10:	19.083247898189956
epoch 20:	19.03569606932404
epoch 30:	18.94439668950445
epoch 40:	18.87147352515329



Took: 165.94341206550598
--------------------------------------------------
En: 50	Tr: 240
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.207796265508662
epoch 10:	21.99923903785189
epoch 20:	21.8343851582949
epoch 30:	21.63882946082881
epoch 40:	21.4389646683601
epoch 50:	21.222813907040518
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.8077726276764787
Epoch 25: 0.008134934987037924
Epoch 50: 0.003975011260735661
Epoch 75: 0.0026854954341419654
Epoch 100: 0.002047992654231699
Epoch 125: 0.0016648195952402131
Epoch 150: 0.0014078559791512596
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 194.4621250629425
--------------------------------------------------
En: 50	Tr: 280
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	20.233458047309444
epoch 10:	20.041739636448913
epoch 20:	19.891385162254203
epoch 30:	19.71715369944107
epoch 40:	19.561088087049676
epoch 50:	19.43148609784066
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 4.821938721553482
Epoch 25: 0.01253030508266668
Epoch 50: 0.005854549981108214
Epoch 75: 0.0039043113349882994
Epoch 100: 0.002959969207274822
Epoch 125: 0.0023984562801987607
Epoch 150: 0.0020243045445926273
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 215.1549551486969
--------------------------------------------------
En: 50	Tr: 320
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	23.430905383380168
epoch 10:	23.18144371995767
epoch 20:	22.94502573587449
epoch 30:	22.70553442408163
epoch 40:	22.466395823016427
epoch 50:	22.25169961727304
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.5469703360552067
Epoch 25: 0.008192623501099353
Epoch 50: 0.003823824644709131
Epoch 75: 0.002550301385817598
Epoch 100: 0.001932100512973451
Epoch 125: 0.0015639287674667451
Epoch 150: 0.0013184374155939223
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 228.99712491035461
--------------------------------------------------
En: 50	Tr: 360
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.13661317279356
epoch 10:	21.035086856216374
epoch 20:	20.90981561251643
epoch 30:	20.738727637396543
epoch 40:	20.59078150136115
epoch 50:	20.456599832109802
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.27495084668355
Epoch 25: 0.008682058947574772
Epoch 50: 0.003922100654622726
Epoch 75: 0.002587961226206156
Epoch 100: 0.0019491792230337246
Epoch 125: 0.001571730005687824
Epoch 150: 0.0013214073862277556
Using well-known algorithms: Logistic Regression, RandomForest and MLP
Took: 257.6350030899048
--------------------------------------------------
En: 50	Tr: 400
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	20.7418612024327
epoch 10:	20.61126471828218
epoch 20:	20.427651514925522
epoch 30:	20.224610123807626
epoch 40:	20.038430630643447




Took: 271.9390392303467
--------------------------------------------------
En: 100	Tr: 40
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	19.86882168245634
epoch 10:	19.9092612408944
epoch 20:	19.805491090237812
epoch 30:	19.614890601752453
epoch 40:	19.394302070273426
epoch 50:	19.202363324633318
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 5.129584824559944
Epoch 25: 0.07302809065203064
Epoch 50: 0.018936178862778822
Epoch 75: 0.008098027134043865
Epoch 100: 0.005344458754093954
Epoch 125: 0.004043634303583791
Epoch 150: 0.003275666567844837
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 106.9969220161438
--------------------------------------------------
En: 100	Tr: 80
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	19.8334635842836
epoch 10:	19.596365687491907
epoch 20:	19.42477436447093
epoch 30:	19.18635338742181
epoch 40:	18.98612432579517
epoch 50:	18.824622419099178
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 4.073025793182993
Epoch 25: 0.024851973619600662
Epoch 50: 0.009547373446093844
Epoch 75: 0.006075273871617052
Epoch 100: 0.004498353506844271
Epoch 125: 0.0035907408257824297
Epoch 150: 0.00299853506651036
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 131.9662082195282
--------------------------------------------------
En: 100	Tr: 120
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	23.16143237968937
epoch 10:	22.895519960223215
epoch 20:	22.630408198515518
epoch 30:	22.393349482121188
epoch 40:	22.15018465776216
epoch 50:	21.9228406337818
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 4.097704803931992
Epoch 25: 0.01171999797953421
Epoch 50: 0.005583083305720555
Epoch 75: 0.0037276106836987887
Epoch 100: 0.002820443417107816
Epoch 125: 0.0022790729098708406
Epoch 150: 0.0019179223946500976
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 157.23956084251404
--------------------------------------------------
En: 100	Tr: 160
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	18.030692492409532
epoch 10:	17.966970356394008
epoch 20:	17.902395139477072
epoch 30:	17.842417408589657
epoch 40:	17.792289593120998
epoch 50:	17.742208190843115
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.6150671737271365
Epoch 25: 0.02688962801029832
Epoch 50: 0.008102054117510376
Epoch 75: 0.004946322766180454
Epoch 100: 0.0035976783454416887
Epoch 125: 0.0028439896536409694
Epoch 150: 0.0023607819092273546
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 177.96316170692444
--------------------------------------------------
En: 100	Tr: 200
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.79707911365647
epoch 10:	21.579340775055037
epoch 20:	21.319263888025546
epoch 30:	21.083574187988813
epoch 40:	20.877853598556165
epoch 50:	20.678224027628165
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 4.022157329743908
Epoch 25: 0.013149791939492653
Epoch 50: 0.005074322252737223
Epoch 75: 0.003283855563414401
Epoch 100: 0.0024660120824588224
Epoch 125: 0.0019900957459336155
Epoch 150: 0.0016760999038595513
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 200.16781044006348
--------------------------------------------------
En: 100	Tr: 240
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	20.48452854974004
epoch 10:	20.36027199581928
epoch 20:	20.1939450112428
epoch 30:	20.05790142204325
epoch 40:	19.93724989704214
epoch 50:	19.82781621670487
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.5623621198098268
Epoch 25: 0.008550410627038475
Epoch 50: 0.003926103010309678
Epoch 75: 0.002606454517836872
Epoch 100: 0.001970258335651293
Epoch 125: 0.0015927595037199434
Epoch 150: 0.0013416406704134895
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 229.26985144615173
--------------------------------------------------
En: 100	Tr: 280
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.10494286979153
epoch 10:	20.89894679956469
epoch 20:	20.685748409781187
epoch 30:	20.5010005360481
epoch 40:	20.354514089265002
epoch 50:	20.182619935342764
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.8759061492468296
Epoch 25: 0.011034273873407361
Epoch 50: 0.004876591607318771
Epoch 75: 0.003214784715911073
Epoch 100: 0.0024239204727803695
Epoch 125: 0.0019574643842038076
Epoch 150: 0.0016482350777347451
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 249.13368248939514
--------------------------------------------------
En: 100	Tr: 320
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.020764458651293
epoch 10:	20.891886241392378
epoch 20:	20.68192359233935
epoch 30:	20.49713849051033
epoch 40:	20.373689276336414
epoch 50:	20.265332904851938
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.676640738699782
Epoch 25: 0.029177015036527466
Epoch 50: 0.006549534974939369
Epoch 75: 0.0038066155166732295
Epoch 100: 0.0027189252839662005
Epoch 125: 0.0021315732859940387
Epoch 150: 0.0017631736603724287
Using well-known algorithms: Logistic Regression, RandomForest and MLP
Took: 271.3981873989105
--------------------------------------------------
En: 100	Tr: 360
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	19.611996609227326
epoch 10:	19.550913302986253
epoch 20:	19.484419036839885
epoch 30:	19.43057801214237
epoch 40:	19.37056355



Took: 289.33306646347046
--------------------------------------------------
En: 100	Tr: 400
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.4479804995722
epoch 10:	22.235946698893777
epoch 20:	22.00800224306249
epoch 30:	21.790577228389363
epoch 40:	21.61653206921743
epoch 50:	21.48750014526655
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.1582032730954523
Epoch 25: 0.004965149507272372
Epoch 50: 0.002417920444570882
Epoch 75: 0.0016318884558484115
Epoch 100: 0.0012430625843048802
Epoch 125: 0.0010091691360825052
Epoch 150: 0.0008522512376118243
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 316.6615414619446
--------------------------------------------------
En: 150	Tr: 40
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.15616335692374
epoch 10:	21.857546057276803
epoch 20:	21.3647330114568
epoch 30:	20.99977583926535
epoch 40:	20.714517377219174
epoch 50:	20.453905815546484
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.791492950846441
Epoch 25: 0.022575320332503236
Epoch 50: 0.0074050309615406726
Epoch 75: 0.004584078676560622
Epoch 100: 0.0033654293342543066
Epoch 125: 0.0026786330240826683
Epoch 150: 0.00223509240462303
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 142.16614723205566
--------------------------------------------------
En: 150	Tr: 80
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.8172021139943
epoch 10:	21.598703792211186
epoch 20:	21.425414424510098
epoch 30:	21.246898187583902
epoch 40:	21.076554412868582
epoch 50:	20.922790006585526
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.6764972273495875
Epoch 25: 0.008017707224739158
Epoch 50: 0.004025442810036664
Epoch 75: 0.0027544117511554404
Epoch 100: 0.0021162931054356667
Epoch 125: 0.0017289250559770152
Epoch 150: 0.0014673169815701524
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 167.22138571739197
--------------------------------------------------
En: 150	Tr: 120
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	19.48992700144672
epoch 10:	19.316554728702908
epoch 20:	19.1712271567448
epoch 30:	18.991007348601958
epoch 40:	18.83458985944437
epoch 50:	18.71095832879485
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.7186488738201056
Epoch 25: 0.015392832230211886
Epoch 50: 0.005814481665725503
Epoch 75: 0.0037161891812222286
Epoch 100: 0.0027641083468274196
Epoch 125: 0.0022143103348145976
Epoch 150: 0.0018540579293586377
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 187.22345805168152
--------------------------------------------------
En: 150	Tr: 160
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	18.585262591379355
epoch 10:	18.527532554101132
epoch 20:	18.44668109394878
epoch 30:	18.379011969405234
epoch 40:	18.31026156791402
epoch 50:	18.235632920667165
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 6.386730050672181
Epoch 25: 0.03168226513110224
Epoch 50: 0.008378137390590998
Epoch 75: 0.00513829306515955
Epoch 100: 0.003777836485899629
Epoch 125: 0.0030132118734275447
Epoch 150: 0.0025192542548068326
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 210.33798003196716
--------------------------------------------------
En: 150	Tr: 200
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	23.573434935504174
epoch 10:	23.351050803686526
epoch 20:	23.149934728436136
epoch 30:	22.98582652364161
epoch 40:	22.85166469344882
epoch 50:	22.726070760664655
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.103576277324547
Epoch 25: 0.007391933911624327
Epoch 50: 0.003524582524425617
Epoch 75: 0.002367519000686156
Epoch 100: 0.0018002176498481098
Epoch 125: 0.0014604808573380572
Epoch 150: 0.0012331005110168495
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 233.06008338928223
--------------------------------------------------
En: 150	Tr: 240
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.00986803254752
epoch 10:	21.822853833190955
epoch 20:	21.689463576386274
epoch 30:	21.537908730465613
epoch 40:	21.382265836372866
epoch 50:	21.254283402042567
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.8044332785358748
Epoch 25: 0.010539666725563644
Epoch 50: 0.004936709455316117
Epoch 75: 0.003311756141178753
Epoch 100: 0.002522434524942769
Epoch 125: 0.0020510104281689003
Epoch 150: 0.0017355997590129846
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 264.267028093338
--------------------------------------------------
En: 150	Tr: 280
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	19.989097355594925
epoch 10:	19.91210693328592
epoch 20:	19.850295174191455
epoch 30:	19.788516176000545
epoch 40:	19.729596618388232
epoch 50:	19.67257795478231
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.9771409824188848
Epoch 25: 0.005876495422645285
Epoch 50: 0.0027761702840356796
Epoch 75: 0.0018485769343563989
Epoch 100: 0.00139673410024094
Epoch 125: 0.0011277702119314675
Epoch 150: 0.0009487010400361278
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 283.77928614616394
--------------------------------------------------
En: 150	Tr: 320
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.15872654945169
epoch 10:	20.994643254515445
epoch 20:	20.889219108199434
epoch 30:	20.77700338469528
epoch 40:	20.685846463670977
epoch 50:	20.5934540559441
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.9829293372635877
Epoch 25: 0.00890653217077941
Epoch 50: 0.0036769324435628266
Epoch 75: 0.0023857103837788096
Epoch 100: 0.0017877976719415877
Epoch 125: 0.0014393954148816076
Epoch 150: 0.0012098678454590492
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 307.3232114315033
--------------------------------------------------
En: 150	Tr: 360
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	26.114602819401682
epoch 10:	25.760612354606614
epoch 20:	25.27181005061845
epoch 30:	24.814327419460774
epoch 40:	24.364151873191403
epoch 50:	23.952279100798513
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.2453539410875405
Epoch 25: 0.007221409808606522
Epoch 50: 0.003292698211774169
Epoch 75: 0.002175347149274303
Epoch 100: 0.0016407766716837408
Epoch 125: 0.0013255510151176238
Epoch 150: 0.0011168204209540962
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 331.32477378845215
--------------------------------------------------
En: 150	Tr: 400
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.851142158405626
epoch 10:	22.801282839150993
epoch 20:	22.576772341395934
epoch 30:	22.336404643846812
epoch 40:	22.13157604571347
epoch 50:	21.936151582047092
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.835650669848189
Epoch 25: 0.008615990609600265
Epoch 50: 0.0037791118000726244
Epoch 75: 0.0024965943380578894
Epoch 100: 0.0018896010808659143
Epoch 125: 0.001531597887463983
Epoch 150: 0.0012937564252620021
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 344.8439230918884
--------------------------------------------------
En: 200	Tr: 40
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.87951329775793
epoch 10:	21.609583785677806
epoch 20:	21.374985183572463
epoch 30:	21.146925251475402
epoch 40:	20.908895052425535
epoch 50:	20.683415301318153
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.0130757942501516
Epoch 25: 0.018946271147936265
Epoch 50: 0.004527614615739613
Epoch 75: 0.002793914477332393
Epoch 100: 0.0020625949514161118
Epoch 125: 0.0016485953188882499
Epoch 150: 0.0013795942953673665
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 169.36197710037231
--------------------------------------------------
En: 200	Tr: 80
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.141952502430648
epoch 10:	20.986465825104034
epoch 20:	20.73812942822706
epoch 30:	20.522807447303148
epoch 40:	20.32276133698312
epoch 50:	20.147302365526336
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 5.931420692024758
Epoch 25: 0.0274967080895619
Epoch 50: 0.007544038837314864
Epoch 75: 0.004652672860395666
Epoch 100: 0.0034209811425203692
Epoch 125: 0.0027264560565738193
Epoch 150: 0.0022766816292249
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 193.20699405670166
--------------------------------------------------
En: 200	Tr: 120
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	18.8286005941117
epoch 10:	18.73982597343862
epoch 20:	18.63870677704113
epoch 30:	18.557730064488265
epoch 40:	18.490939181058966
epoch 50:	18.41826496452496
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 5.717472193680078
Epoch 25: 0.027288279246897675
Epoch 50: 0.00819466482400628
Epoch 75: 0.00500264523851115
Epoch 100: 0.0036470251293508225
Epoch 125: 0.0028877940763777587
Epoch 150: 0.0023996049808403088
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 218.02778792381287
--------------------------------------------------
En: 200	Tr: 160
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.833737199558858
epoch 10:	22.53110330653086
epoch 20:	22.30699458248825
epoch 30:	22.11693086532055
epoch 40:	21.960229457232266
epoch 50:	21.824923931630057
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 4.33508396314053
Epoch 25: 0.01592504507264165
Epoch 50: 0.005995053685411891
Epoch 75: 0.0038420682688142605
Epoch 100: 0.0028688629922456244
Epoch 125: 0.00230762209648216
Epoch 150: 0.0019401352673391948
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 262.4925711154938
--------------------------------------------------
En: 200	Tr: 200
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	25.313367207873036
epoch 10:	25.199084836044797
epoch 20:	24.978783905433346
epoch 30:	24.71380693032838
epoch 40:	24.44487538676406
epoch 50:	24.182971413031176
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.084877554877587
Epoch 25: 0.006880885046975323
Epoch 50: 0.0034188256505302663
Epoch 75: 0.0023227515433063674
Epoch 100: 0.001776408325976604
Epoch 125: 0.0014465876076837957
Epoch 150: 0.0012247978951802452
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 272.2025966644287
--------------------------------------------------
En: 200	Tr: 240
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	18.50827004943057
epoch 10:	18.40771581824874
epoch 20:	18.278970912527704
epoch 30:	18.170394785411673
epoch 40:	18.08557541778162
epoch 50:	18.02176877612144
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.258408594256848
Epoch 25: 0.03581006818670341
Epoch 50: 0.007744724390673499
Epoch 75: 0.004443860824132819
Epoch 100: 0.0031487209098165956
Epoch 125: 0.0024498513996005305
Epoch 150: 0.0020104004706405993
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 300.81946086883545
--------------------------------------------------
En: 200	Tr: 280
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	30.120137697344433
epoch 10:	29.69258176729101
epoch 20:	29.2924536832179
epoch 30:	29.092448989197653
epoch 40:	29.197482055681814
epoch 50:	28.996832828671863
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.712213295086008
Epoch 25: 0.006052407208360933
Epoch 50: 0.002970356582228343
Epoch 75: 0.0019962239565464537
Epoch 100: 0.0015132678838424064
Epoch 125: 0.0012232408024338485
Epoch 150: 0.0010291367877735583
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 322.5249376296997
--------------------------------------------------
En: 200	Tr: 320
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	18.488690836008942
epoch 10:	18.44451361650892
epoch 20:	18.401417866662825
epoch 30:	18.35061118800551
epoch 40:	18.289243706219736
epoch 50:	18.227061270656314
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.1896447209087393
Epoch 25: 0.010615806706615362
Epoch 50: 0.003953996544274219
Epoch 75: 0.002516640535156748
Epoch 100: 0.0018705631424802724
Epoch 125: 0.001499050186277683
Epoch 150: 0.0012561569093688918
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 337.04653096199036
--------------------------------------------------
En: 200	Tr: 360
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.7590645417584
epoch 10:	21.577269451447524
epoch 20:	21.45013678703915
epoch 30:	21.364717144900236
epoch 40:	21.290260857748514
epoch 50:	21.216369578558403
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.47821453957151
Epoch 25: 0.006764135695345073
Epoch 50: 0.0031654199016721966
Epoch 75: 0.002120024489424769
Epoch 100: 0.0016114300280725452
Epoch 125: 0.0013077016186826996
Epoch 150: 0.0011046192404086648
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 356.83785462379456
--------------------------------------------------
En: 200	Tr: 400
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	25.60914285300418
epoch 10:	25.22745892870912
epoch 20:	24.866948447984914
epoch 30:	24.58809774502688
epoch 40:	24.292140936662708
epoch 50:	23.97734841748848
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.646724799795411
Epoch 25: 0.008839020231458411
Epoch 50: 0.00433057249042785
Epoch 75: 0.002948463788459132
Epoch 100: 0.002262426031124297
Epoch 125: 0.00184795593677336
Epoch 150: 0.001568709950307247
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 381.79757475852966
--------------------------------------------------
En: 250	Tr: 40
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	20.952726617239424
epoch 10:	20.817256859920686
epoch 20:	20.707687955423584
epoch 30:	20.601828493966888
epoch 40:	20.502902709480832
epoch 50:	20.406839692033632
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.6536252902080566
Epoch 25: 0.011118674516534345
Epoch 50: 0.0045866840274701925
Epoch 75: 0.003002458423781781
Epoch 100: 0.0022629215808655286
Epoch 125: 0.001829097366257413
Epoch 150: 0.0015418871873313538
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 204.3739891052246
--------------------------------------------------
En: 250	Tr: 80
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	19.8784085718491
epoch 10:	19.702857991683402
epoch 20:	19.501702421596452
epoch 30:	19.33651598095254
epoch 40:	19.206772382684413
epoch 50:	19.080008138963034
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 4.958588392660904
Epoch 25: 0.01252805821377754
Epoch 50: 0.005421447953675383
Epoch 75: 0.00352707057868291
Epoch 100: 0.002636141976610375
Epoch 125: 0.002114606324275056
Epoch 150: 0.0017707202359898391
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 241.1300253868103
--------------------------------------------------
En: 250	Tr: 120
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	19.50984084546834
epoch 10:	19.41343576482552
epoch 20:	19.33363715062497
epoch 30:	19.245676341982495
epoch 40:	19.15225546774284
epoch 50:	19.064223896230608
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.4761431308412876
Epoch 25: 0.010081440418464907
Epoch 50: 0.004786870506798034
Epoch 75: 0.0031992668994594415
Epoch 100: 0.00242245679423432
Epoch 125: 0.001958343715649347
Epoch 150: 0.0016485054008571014
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 264.4336893558502
--------------------------------------------------
En: 250	Tr: 160
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	19.216309278630806
epoch 10:	19.194461330104858
epoch 20:	19.12608715508994
epoch 30:	19.03428923330069
epoch 40:	18.929643425467948
epoch 50:	18.840975112275867
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.7402938980374927
Epoch 25: 0.00514508184037035
Epoch 50: 0.002627125040819409
Epoch 75: 0.0018052588430137115
Epoch 100: 0.001389780850608875
Epoch 125: 0.0011366604193848084
Epoch 150: 0.0009653131772466157
Using well-known algorithms: Logistic Regression, RandomForest and MLP
Took: 285.7581968307495
--------------------------------------------------
En: 250	Tr: 200
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.070488543865814
epoch 10:	20.943190681980205
epoch 20:	20.765631263675466
epoch 30:	20.619234313766523
epoch 40:	20.499348097



Took: 308.22048258781433
--------------------------------------------------
En: 250	Tr: 240
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.13082299579924
epoch 10:	20.93985571266741
epoch 20:	20.79405430130888
epoch 30:	20.52842073454652
epoch 40:	20.313880828448106
epoch 50:	20.115741299331088
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.1386772949409374
Epoch 25: 0.012147004881993854
Epoch 50: 0.0038992278246273693
Epoch 75: 0.002499413326609101
Epoch 100: 0.0018749466613380904
Epoch 125: 0.0015148897302090202
Epoch 150: 0.0012784832698455328
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 328.01486110687256
--------------------------------------------------
En: 250	Tr: 280
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	20.28392720614052
epoch 10:	20.16524281941698
epoch 20:	20.05266255221482
epoch 30:	19.937405853721472
epoch 40:	19.85109249759553
epoch 50:	19.76797902459815
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.749173306745193
Epoch 25: 0.005152725766729171
Epoch 50: 0.002573071666336815
Epoch 75: 0.0017550996828565005
Epoch 100: 0.0013450042285652573
Epoch 125: 0.0010961843947604997
Epoch 150: 0.0009282196819177317
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 348.3618206977844
--------------------------------------------------
En: 250	Tr: 320
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	23.214948044520476
epoch 10:	22.749101182281315
epoch 20:	22.52291352422376
epoch 30:	22.2283484491143
epoch 40:	21.99950670311144
epoch 50:	21.799729907863714
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.7062756595164932
Epoch 25: 0.004697346904462295
Epoch 50: 0.0023597265071381864
Epoch 75: 0.0016125991305116578
Epoch 100: 0.0012376489428418294
Epoch 125: 0.0010100535591528856
Epoch 150: 0.0008563226516329543
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 375.36515069007874
--------------------------------------------------
En: 250	Tr: 360
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.9796712891421
epoch 10:	22.74682510757704
epoch 20:	22.461683607206236
epoch 30:	22.166642274140553
epoch 40:	21.934790859440838
epoch 50:	21.730711688571624
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 4.955847713540484
Epoch 25: 0.011883186281515098
Epoch 50: 0.0051278996162420565
Epoch 75: 0.0033815795689898535
Epoch 100: 0.0025604518606393016
Epoch 125: 0.0020775237672341225
Epoch 150: 0.0017572520026235116
Using well-known algorithms: Logistic Regression, RandomForest and MLP
Took: 393.17196106910706
--------------------------------------------------
En: 250	Tr: 400
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	23.145035685847127
epoch 10:	22.961444867638857
epoch 20:	22.800300878752246
epoch 30:	22.649455385917292
epoch 40:	22.514695



Took: 407.61310338974
--------------------------------------------------
En: 300	Tr: 40
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.00293898528469
epoch 10:	21.80178594233744
epoch 20:	21.620557175868647
epoch 30:	21.38161168538247
epoch 40:	21.13645009528317
epoch 50:	20.915321559398684
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.6980728767266984
Epoch 25: 0.008137032215544798
Epoch 50: 0.0032344357458281166
Epoch 75: 0.0020930516317214237
Epoch 100: 0.0015684026222756715
Epoch 125: 0.0012636639357899238
Epoch 150: 0.0010632610615703818
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 238.04393458366394
--------------------------------------------------
En: 300	Tr: 80
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	24.526886122801898
epoch 10:	24.065891438433823
epoch 20:	23.700930914062432
epoch 30:	23.428588806547776
epoch 40:	23.20742315846807
epoch 50:	22.963659580181464
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.8929623735854157
Epoch 25: 0.011117439200126299
Epoch 50: 0.004842279212243739
Epoch 75: 0.0032271784414277995
Epoch 100: 0.002458861794002587
Epoch 125: 0.0020031184658268774
Epoch 150: 0.001698961645405162
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 266.552695274353
--------------------------------------------------
En: 300	Tr: 120
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.67502496924017
epoch 10:	21.383501030426277
epoch 20:	21.19323519433857
epoch 30:	21.035232985458055
epoch 40:	20.891465500634453
epoch 50:	20.760009787720982
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.7341412948312827
Epoch 25: 0.008669030578916395
Epoch 50: 0.003921413069413878
Epoch 75: 0.0025925106799344834
Epoch 100: 0.001954601717245298
Epoch 125: 0.001576843445554048
Epoch 150: 0.0013258921469579425
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 286.59071946144104
--------------------------------------------------
En: 300	Tr: 160
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	24.041746420127573
epoch 10:	23.779847022675277
epoch 20:	23.49091149963166
epoch 30:	23.300782376701022
epoch 40:	23.15711748149478
epoch 50:	23.023955710410497
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.9281752178022091
Epoch 25: 0.008197828841674021
Epoch 50: 0.0036887857709604326
Epoch 75: 0.002434244551146677
Epoch 100: 0.001834922189502826
Epoch 125: 0.0014809828738638385
Epoch 150: 0.0012462341748594643
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 315.76285672187805
--------------------------------------------------
En: 300	Tr: 200
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.70394025915572
epoch 10:	21.610268296580728
epoch 20:	21.587209888569767
epoch 30:	21.522669543179568
epoch 40:	21.432579596532612
epoch 50:	21.3178972458377
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.17281551660852
Epoch 25: 0.013922408836180693
Epoch 50: 0.005509425762244064
Epoch 75: 0.0035312491943113853
Epoch 100: 0.0026259100250244955
Epoch 125: 0.0021015814355432668
Epoch 150: 0.0017577115177391533
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 334.5150303840637
--------------------------------------------------
En: 300	Tr: 240
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.806555730606654
epoch 10:	22.478975681761572
epoch 20:	22.32324112611806
epoch 30:	22.184692875291386
epoch 40:	22.021589944065738
epoch 50:	21.85981746801544
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.5704141190396816
Epoch 25: 0.00581188155946919
Epoch 50: 0.002750116382206034
Epoch 75: 0.0018484664724330404
Epoch 100: 0.001407294011144068
Epoch 125: 0.001142923501280456
Epoch 150: 0.0009657868739594206
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 357.9664890766144
--------------------------------------------------
En: 300	Tr: 280
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	27.7877659579056
epoch 10:	27.28060726695208
epoch 20:	26.861793717591194
epoch 30:	26.5156232289071
epoch 40:	26.151737369553736
epoch 50:	25.792026847774974
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.5845950818573202
Epoch 25: 0.0067961114938161074
Epoch 50: 0.0032799630812640882
Epoch 75: 0.0022104156976422867
Epoch 100: 0.0016827501860412777
Epoch 125: 0.0013659554146617492
Epoch 150: 0.001153763605490722
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 393.20270228385925
--------------------------------------------------
En: 300	Tr: 320
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.620714075008696
epoch 10:	21.564670666911866
epoch 20:	21.461081662918136
epoch 30:	21.350367148281826
epoch 40:	21.23047550451202
epoch 50:	21.109757343529292
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.4620138788820922
Epoch 25: 0.003627448999766568
Epoch 50: 0.0018960977684239957
Epoch 75: 0.0013088087846062119
Epoch 100: 0.0010082517931969902
Epoch 125: 0.000824230684981311
Epoch 150: 0.0006994362657218243
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 390.75848746299744
--------------------------------------------------
En: 300	Tr: 360
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	23.586691053320486
epoch 10:	23.283824528945345
epoch 20:	23.181527020408776
epoch 30:	22.92851255474014
epoch 40:	22.63724653412549
epoch 50:	22.37500433865825
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.970863287982867
Epoch 25: 0.006628275355538363
Epoch 50: 0.0032079747681834623
Epoch 75: 0.0021760516404550814
Epoch 100: 0.0016661011466729207
Epoch 125: 0.0013585199725833325
Epoch 150: 0.00115136702317207
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 426.1990637779236
--------------------------------------------------
En: 300	Tr: 400
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	23.370965675660774
epoch 10:	23.041347087166763
epoch 20:	22.815533198437226
epoch 30:	22.603692476160468
epoch 40:	22.374366857126894
epoch 50:	22.140637718632778
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.71432944500629
Epoch 25: 0.005269735050367542
Epoch 50: 0.0025028521668108233
Epoch 75: 0.0016617323160962119
Epoch 100: 0.0012518985148484976
Epoch 125: 0.0010082903701803454
Epoch 150: 0.0008463903871268031
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 439.8830053806305
--------------------------------------------------
En: 350	Tr: 40
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.840406400134363
epoch 10:	22.72766914257078
epoch 20:	22.658416132407698
epoch 30:	22.54305212473543
epoch 40:	22.42056184173374
epoch 50:	22.30306192821717
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.78448225529148
Epoch 25: 0.007975178076342742
Epoch 50: 0.0036708642078326355
Epoch 75: 0.0024277629318750116
Epoch 100: 0.0018300123113348468
Epoch 125: 0.0014766624202360245
Epoch 150: 0.0012424565322869687
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 266.7939364910126
--------------------------------------------------
En: 350	Tr: 80
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	23.458397486303152
epoch 10:	23.240903879574304
epoch 20:	22.990370546352676
epoch 30:	22.664957263954367
epoch 40:	22.424840915403752
epoch 50:	22.176649467270998
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.2101496976152704
Epoch 25: 0.007831579747230429
Epoch 50: 0.0027638932735414507
Epoch 75: 0.001810474767241198
Epoch 100: 0.0013734394252381652
Epoch 125: 0.0011176697731232622
Epoch 150: 0.0009480677303345992
Using well-known algorithms: Logistic Regression, RandomForest and MLP
Took: 303.39286494255066
--------------------------------------------------
En: 350	Tr: 120
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.65569116525012
epoch 10:	21.371186313643847
epoch 20:	21.22903923514204
epoch 30:	21.111701516639307
epoch 40:	20.9963003



Took: 324.26872301101685
--------------------------------------------------
En: 350	Tr: 160
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	20.51922074866193
epoch 10:	20.411897298435374
epoch 20:	20.293958918303794
epoch 30:	20.20127079392801
epoch 40:	20.09803963424103
epoch 50:	19.99175866254206
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.682236247484737
Epoch 25: 0.008393300408211943
Epoch 50: 0.0036676232532250187
Epoch 75: 0.002399088397570205
Epoch 100: 0.0017987446927263055
Epoch 125: 0.001445817800147302
Epoch 150: 0.001212388649158878
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 337.45028948783875
--------------------------------------------------
En: 350	Tr: 200
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	20.923820454347645
epoch 10:	20.725307990978603
epoch 20:	20.57279685246638
epoch 30:	20.458851793995706
epoch 40:	20.32550417341708
epoch 50:	20.227381980047614
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.940578348661352
Epoch 25: 0.007114570490658397
Epoch 50: 0.003445340299008938
Epoch 75: 0.002329678197014366
Epoch 100: 0.0017787804336665597
Epoch 125: 0.001447265470067123
Epoch 150: 0.001224628769147826
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 368.565794467926
--------------------------------------------------
En: 350	Tr: 240
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	27.58607593496331
epoch 10:	27.182146223035375
epoch 20:	26.80172055065459
epoch 30:	26.392625142462304
epoch 40:	26.064669483753885
epoch 50:	25.7874527559345
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.7634749916188768
Epoch 25: 0.0076997294071989636
Epoch 50: 0.0033847177148179797
Epoch 75: 0.0022627475563197904
Epoch 100: 0.0017259672002240731
Epoch 125: 0.0014067053084260298
Epoch 150: 0.0011932818208849267
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 380.22469544410706
--------------------------------------------------
En: 350	Tr: 280
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.31159176318086
epoch 10:	21.12054531201536
epoch 20:	21.06795373836581
epoch 30:	20.9633640609961
epoch 40:	20.85744323944145
epoch 50:	20.72748883003543
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.8755643770221107
Epoch 25: 0.008947385034043185
Epoch 50: 0.0034603598483261607
Epoch 75: 0.0022297904001065366
Epoch 100: 0.0016674158525629102
Epoch 125: 0.001341241446827791
Epoch 150: 0.0011268940423803094
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 405.5441334247589
--------------------------------------------------
En: 350	Tr: 320
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.421385923636088
epoch 10:	22.26281990422637
epoch 20:	22.124764165781556
epoch 30:	22.000924773607043
epoch 40:	21.877927393328395
epoch 50:	21.77999184626852
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.711227793063346
Epoch 25: 0.004659973546139619
Epoch 50: 0.0022946702745019524
Epoch 75: 0.0015513461655014091
Epoch 100: 0.0011831445186872565
Epoch 125: 0.0009618532739573133
Epoch 150: 0.0008135254353022598
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 412.2293553352356
--------------------------------------------------
En: 350	Tr: 360
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.483424296325254
epoch 10:	21.32296445453031
epoch 20:	21.208561790741996
epoch 30:	21.07646651660164
epoch 40:	20.92304640529438
epoch 50:	20.75964281406563
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.5765765652890162
Epoch 25: 0.00343464729970183
Epoch 50: 0.0017249583785169116
Epoch 75: 0.0011724351114286722
Epoch 100: 0.0008957068511127861
Epoch 125: 0.0007284170601918034
Epoch 150: 0.0006159124343828377
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 444.66883993148804
--------------------------------------------------
En: 350	Tr: 400
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	26.179839302784472
epoch 10:	25.576090774997734
epoch 20:	25.205913497963767
epoch 30:	24.991913288263383
epoch 40:	24.741204662997685
epoch 50:	24.442659178674234
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.3928176827891425
Epoch 25: 0.0045135437331470405
Epoch 50: 0.002266400198116571
Epoch 75: 0.0015422260070169311
Epoch 100: 0.0011797430327651454
Epoch 125: 0.0009606135064677801
Epoch 150: 0.0008132255657110363
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 465.2196583747864
--------------------------------------------------
En: 400	Tr: 40
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.29926222026892
epoch 10:	21.11488588670334
epoch 20:	20.975203584096853
epoch 30:	20.8222107088519
epoch 40:	20.699779291364077
epoch 50:	20.598483994136803
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.3223790350604925
Epoch 25: 0.006836556179405306
Epoch 50: 0.0030808352925388053
Epoch 75: 0.002042173476105414
Epoch 100: 0.001545674896442506
Epoch 125: 0.001252033517398972
Epoch 150: 0.0010569401463085754
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 290.41162514686584
--------------------------------------------------
En: 400	Tr: 80
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	20.873928573539683
epoch 10:	20.973121449008314
epoch 20:	20.92089117847015
epoch 30:	20.770119413460357
epoch 40:	20.60951863435732
epoch 50:	20.41223513713303
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.506060616646376
Epoch 25: 0.005318843104671108
Epoch 50: 0.0025992306247313007
Epoch 75: 0.0017564939765909078
Epoch 100: 0.0013388524918174956
Epoch 125: 0.0010874219554021162
Epoch 150: 0.0009186486022665955
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 328.465763092041
--------------------------------------------------
En: 400	Tr: 120
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	24.61454244011288
epoch 10:	24.165791871198095
epoch 20:	23.898565013816025
epoch 30:	23.507805629638888
epoch 40:	23.217714559631055
epoch 50:	22.950587830804373
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.8024391887044589
Epoch 25: 0.004980292215989616
Epoch 50: 0.0024091071341528136
Epoch 75: 0.0016343711622303439
Epoch 100: 0.0012526584923488843
Epoch 125: 0.0010230263031807305
Epoch 150: 0.0008687115281633649
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 349.7385263442993
--------------------------------------------------
En: 400	Tr: 160
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	19.57811228164099
epoch 10:	19.531581143426457
epoch 20:	19.454097343032256
epoch 30:	19.33968535376342
epoch 40:	19.26712758466028
epoch 50:	19.189677536941254
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.0452415409938633
Epoch 25: 0.009580484759180764
Epoch 50: 0.0038255338946474433
Epoch 75: 0.002465984767284876
Epoch 100: 0.001841062652818794
Epoch 125: 0.001478504478329309
Epoch 150: 0.001240443892846445
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 373.60481214523315
--------------------------------------------------
En: 400	Tr: 200
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.44380365005841
epoch 10:	22.18118133211857
epoch 20:	21.870520614361233
epoch 30:	21.564627985445398
epoch 40:	21.327189328690753
epoch 50:	21.158644859832915
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 3.3630839237781767
Epoch 25: 0.0322486193987614
Epoch 50: 0.006470660646891701
Epoch 75: 0.0036925446651192334
Epoch 100: 0.0026591227651052425
Epoch 125: 0.0021045947864983343
Epoch 150: 0.001754366663547747
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 394.7416350841522
--------------------------------------------------
En: 400	Tr: 240
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.067191727425442
epoch 10:	21.85072824707176
epoch 20:	21.645704386715828
epoch 30:	21.48283035081787
epoch 40:	21.342134753992667
epoch 50:	21.225930878568835
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 4.664895919697119
Epoch 25: 0.011087983155647407
Epoch 50: 0.004629468243976476
Epoch 75: 0.0030184831242522137
Epoch 100: 0.002271065301151509
Epoch 125: 0.0018353244462623764
Epoch 150: 0.001548141431008787
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 424.0806291103363
--------------------------------------------------
En: 400	Tr: 280
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.83735857049623
epoch 10:	21.6966712991304
epoch 20:	21.55373292595226
epoch 30:	21.3729331266656
epoch 40:	21.21196052355603
epoch 50:	21.056937233283783
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.225473312827206
Epoch 25: 0.004561033065883681
Epoch 50: 0.0023614989326534563
Epoch 75: 0.0016313918210705517
Epoch 100: 0.0012597303469943078
Epoch 125: 0.0010324025330400927
Epoch 150: 0.0008781099855099228
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 445.601202249527
--------------------------------------------------
En: 400	Tr: 320
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.208023393138784
epoch 10:	22.053769889940078
epoch 20:	21.960512654793757
epoch 30:	21.822568038324405
epoch 40:	21.68941615194226
epoch 50:	21.614434913131582
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.4257325802440945
Epoch 25: 0.006749673141193095
Epoch 50: 0.002956279186164718
Epoch 75: 0.001948320762639671
Epoch 100: 0.0014700651116590457
Epoch 125: 0.0011878640882593396
Epoch 150: 0.001000574793070857
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 467.4432773590088
--------------------------------------------------
En: 400	Tr: 360
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.620640634296038
epoch 10:	21.464633082677484
epoch 20:	21.327736501397787
epoch 30:	21.21000642787328
epoch 40:	21.12543875600614
epoch 50:	21.040567549076805
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.7863706957322085
Epoch 25: 0.004506047688097083
Epoch 50: 0.0021519218612262884
Epoch 75: 0.0014379986099198755
Epoch 100: 0.0010880538694894388
Epoch 125: 0.0008790506792504447
Epoch 150: 0.0007396408634628409
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 483.75329208374023
--------------------------------------------------
En: 400	Tr: 400
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	24.21576054852788
epoch 10:	23.834616366594496
epoch 20:	23.634668806716583
epoch 30:	23.389665809572435
epoch 40:	23.172122843374204
epoch 50:	22.973913730877378
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.7932184366998967
Epoch 25: 0.004098654616596313
Epoch 50: 0.0020359123083156837
Epoch 75: 0.0013854775539358676
Epoch 100: 0.001060535484453453
Epoch 125: 0.0008639618806689597
Epoch 150: 0.0007315597144861918
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 506.7841682434082
--------------------------------------------------
En: 450	Tr: 40
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	19.480410300589888
epoch 10:	19.270216239252143
epoch 20:	19.181743317136444
epoch 30:	19.121840605112464
epoch 40:	19.06802197361673
epoch 50:	19.00808566281007
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.9506104756965141
Epoch 25: 0.006761239584486656
Epoch 50: 0.0029818342097444528
Epoch 75: 0.001953693484311047
Epoch 100: 0.0014659981608075418
Epoch 125: 0.0011792213520753388
Epoch 150: 0.0009896156144535853
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 329.0379192829132
--------------------------------------------------
En: 450	Tr: 80
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.14537635556963
epoch 10:	22.02428544297504
epoch 20:	21.85806883260969
epoch 30:	21.583794913698213
epoch 40:	21.377077305604647
epoch 50:	21.225253482167155
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.3941687764051753
Epoch 25: 0.0068307089400991144
Epoch 50: 0.0028415753545386734
Epoch 75: 0.001835745346423484
Epoch 100: 0.0013667696134449078
Epoch 125: 0.001093445452858399
Epoch 150: 0.0009138176779571332
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 353.3493106365204
--------------------------------------------------
En: 450	Tr: 120
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.197007563022478
epoch 10:	20.973039906864724
epoch 20:	20.840872481148704
epoch 30:	20.746607573113938
epoch 40:	20.65671249527124
epoch 50:	20.573054197491707
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.793341795193364
Epoch 25: 0.0071383617300533955
Epoch 50: 0.003169629574181511
Epoch 75: 0.0020827342965619383
Epoch 100: 0.001566120304488051
Epoch 125: 0.0012620310666350857
Epoch 150: 0.001060850461573082
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 382.7691419124603
--------------------------------------------------
En: 450	Tr: 160
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	23.737049829509935
epoch 10:	23.342088708662683
epoch 20:	22.962459100591115
epoch 30:	22.62586613573776
epoch 40:	22.312403739551865
epoch 50:	22.024203246056192
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.0929574653105982
Epoch 25: 0.004528576672762073
Epoch 50: 0.0022926927458162237
Epoch 75: 0.0015756926622223129
Epoch 100: 0.001214070513391502
Epoch 125: 0.00099370006400559
Epoch 150: 0.0008443876020181528
Using well-known algorithms: Logistic Regression, RandomForest and MLP
Took: 399.6836562156677
--------------------------------------------------
En: 450	Tr: 200
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.28239025882685
epoch 10:	21.901378600496578
epoch 20:	21.547192228849006
epoch 30:	21.289890543279974
epoch 40:	21.135101648



Took: 422.06435465812683
--------------------------------------------------
En: 450	Tr: 240
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.253703588989598
epoch 10:	21.099010866535025
epoch 20:	20.986232313193504
epoch 30:	20.835127505129385
epoch 40:	20.663269739148717
epoch 50:	20.525975899517746
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.6149101901210134
Epoch 25: 0.0036736357066465774
Epoch 50: 0.0017842374687513315
Epoch 75: 0.001206550868734701
Epoch 100: 0.0009209337034794187
Epoch 125: 0.0007490068757029281
Epoch 150: 0.0006335547715515279
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 442.0882349014282
--------------------------------------------------
En: 450	Tr: 280
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	25.280431870318672
epoch 10:	24.662556515710882
epoch 20:	24.239630345665656
epoch 30:	23.796720164510795
epoch 40:	23.5056298227452
epoch 50:	23.27857060345561
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.6935389807063561
Epoch 25: 0.0037294238250203545
Epoch 50: 0.0018301065272088922
Epoch 75: 0.001239415518203365
Epoch 100: 0.0009451291102197645
Epoch 125: 0.0007674239758172187
Epoch 150: 0.0006479307032880614
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 462.37775564193726
--------------------------------------------------
En: 450	Tr: 320
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.31745512608716
epoch 10:	22.143410409380298
epoch 20:	22.087596483365097
epoch 30:	22.014954046036813
epoch 40:	21.93852485032485
epoch 50:	21.85395940992021
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.0587220238619213
Epoch 25: 0.0037490824749823277
Epoch 50: 0.0018776769960627741
Epoch 75: 0.0012764910312644583
Epoch 100: 0.0009752389275076568
Epoch 125: 0.0007929442402851879
Epoch 150: 0.0006702364178894355
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 489.4837996959686
--------------------------------------------------
En: 450	Tr: 360
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.106175220475524
epoch 10:	21.96101478177459
epoch 20:	21.7707268061975
epoch 30:	21.608824572406398
epoch 40:	21.496105958081085
epoch 50:	21.40459548220509
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 0.8767804274696853
Epoch 25: 0.002500961563595113
Epoch 50: 0.0012983104447979387
Epoch 75: 0.000895135414429764
Epoch 100: 0.000689612027524421
Epoch 125: 0.0005639379138320189
Epoch 150: 0.00047871735271302787
Using well-known algorithms: Logistic Regression, RandomForest and MLP
Took: 511.40969157218933
--------------------------------------------------
En: 450	Tr: 400
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	24.410176788443167
epoch 10:	23.872874250352744
epoch 20:	23.581171205706834
epoch 30:	23.35619177288651
epoch 40:	23.116457838



Took: 540.7312471866608
--------------------------------------------------
En: 500	Tr: 40
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	23.32506725036926
epoch 10:	22.948120082494345
epoch 20:	22.67588085175165
epoch 30:	22.419216691680994
epoch 40:	22.21951075020728
epoch 50:	22.041202730160343
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.357492501934542
Epoch 25: 0.005289627370135956
Epoch 50: 0.0024846411058879986
Epoch 75: 0.001662530785728854
Epoch 100: 0.0012619977706216795
Epoch 125: 0.0010229743627793073
Epoch 150: 0.0008633910558913798
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 362.58656549453735
--------------------------------------------------
En: 500	Tr: 80
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	25.773203774062928
epoch 10:	25.39464151106916
epoch 20:	24.974959100055656
epoch 30:	24.696571485576015
epoch 40:	24.458980812377412
epoch 50:	24.262749914963138
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.188106651350972
Epoch 25: 0.016021883354080963
Epoch 50: 0.0031906983974953743
Epoch 75: 0.0018693362858012452
Epoch 100: 0.0013473847658086267
Epoch 125: 0.001062102685673067
Epoch 150: 0.0008807185465880609
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 381.96713972091675
--------------------------------------------------
En: 500	Tr: 120
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.407852993520674
epoch 10:	22.19765419535068
epoch 20:	22.053118868649726
epoch 30:	21.88080576246812
epoch 40:	21.693944213837582
epoch 50:	21.502920229378386
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.9172130795955686
Epoch 25: 0.008242695693232441
Epoch 50: 0.0028673337067190124
Epoch 75: 0.0018431626432387812
Epoch 100: 0.0013780823806670132
Epoch 125: 0.0011080332435870245
Epoch 150: 0.0009302892208002196
Using well-known algorithms: Logistic Regression, RandomForest and MLP
Took: 416.12929034233093
--------------------------------------------------
En: 500	Tr: 160
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	26.23465658431771
epoch 10:	25.865942095217928
epoch 20:	25.50771169969747
epoch 30:	25.20547740437312
epoch 40:	24.9406680



Took: 443.16038942337036
--------------------------------------------------
En: 500	Tr: 200
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.20188350251103
epoch 10:	21.184679665467165
epoch 20:	21.061031887616757
epoch 30:	20.962291531255946
epoch 40:	20.902442518636207
epoch 50:	20.8387017272986
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.100452511768235
Epoch 25: 0.005772479155760136
Epoch 50: 0.002741731791377658
Epoch 75: 0.0018401175485964164
Epoch 100: 0.0013989497672836089
Epoch 125: 0.0011349342187632568
Epoch 150: 0.0009582731701454707
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 453.88634300231934
--------------------------------------------------
En: 500	Tr: 240
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	19.233396992311647
epoch 10:	19.136532771019983
epoch 20:	19.039107660408487
epoch 30:	18.964745874229916
epoch 40:	18.908266651539336
epoch 50:	18.8680766258808
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 2.0242023599048307
Epoch 25: 0.004200307042197463
Epoch 50: 0.0021085711022199366
Epoch 75: 0.0014359210522426307
Epoch 100: 0.001099250021232709
Epoch 125: 0.0008955520042584831
Epoch 150: 0.0007583601326421165
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 475.07741618156433
--------------------------------------------------
En: 500	Tr: 280
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	23.23607595117543
epoch 10:	23.03516256578029
epoch 20:	22.871645735345645
epoch 30:	22.68683516354226
epoch 40:	22.51458901428786
epoch 50:	22.355764833754638
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.7401569580610576
Epoch 25: 0.00354425683439876
Epoch 50: 0.001807268299340847
Epoch 75: 0.0012367929765194055
Epoch 100: 0.0009484570683868335
Epoch 125: 0.000773110689861949
Epoch 150: 0.0006546736124327697
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 502.40260910987854
--------------------------------------------------
En: 500	Tr: 320
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.52443297041617
epoch 10:	22.371771648213887
epoch 20:	22.166387939094676
epoch 30:	21.976266623127916
epoch 40:	21.794654725466525
epoch 50:	21.639506909476616
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 0.9845161505359711
Epoch 25: 0.0029758075678360946
Epoch 50: 0.0014567049426783736
Epoch 75: 0.0009944317526608731
Epoch 100: 0.0007643288219724917
Epoch 125: 0.0006249471765346054
Epoch 150: 0.00053083675341374
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 518.047669172287
--------------------------------------------------
En: 500	Tr: 360
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	22.509238814379973
epoch 10:	22.39081293116903
epoch 20:	22.264130199356277
epoch 30:	22.160243785525306
epoch 40:	22.05777670772466
epoch 50:	21.953472981300134
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.1920903385275101
Epoch 25: 0.0027584391851697746
Epoch 50: 0.001413999952510124
Epoch 75: 0.0009679405087190838
Epoch 100: 0.0007420950841853303
Epoch 125: 0.0006047076177237997
Epoch 150: 0.0005119090998462969
Using well-known algorithms: Logistic Regression, RandomForest and MLP




Took: 527.7090079784393
--------------------------------------------------
En: 500	Tr: 400
Using first variation (Regressor and Classifier with score vectors)
epoch 0:	21.078674286206486
epoch 10:	21.045784127123103
epoch 20:	21.000628341071803
epoch 30:	20.94305857595799
epoch 40:	20.81600526490899
epoch 50:	20.70244733744285
Using second variation (average of outputs produced by each set of weight matrices)
Epoch 0: 1.6150417969505482
Epoch 25: 0.0033072562107956415
Epoch 50: 0.0016942974460660942
Epoch 75: 0.001160200121045973
Epoch 100: 0.0008898304969048196
Epoch 125: 0.0007253599301384542
Epoch 150: 0.0006142749163768871
Using well-known algorithms: Logistic Regression, RandomForest and MLP
Took: 559.8425574302673
--------------------------------------------------




In [104]:
pickle.dump(robustness, open("robustness.results", "wb"))

In [None]:
# HOW TO CHEAT LIKE A PRO
# """
# def test_selective(df_test, W1, W2, W3):
#     reset_graph()
#     x = tf.placeholder(tf.float32, [None, 300])
#     y = tf.placeholder(tf.float32, [None, 10]) # 1-10 => 10 classes

#     w1 = tf.placeholder(tf.float32, [300, 300])
#     w2 = tf.placeholder(tf.float32, [300, 300])
#     w3 = tf.placeholder(tf.float32, [300, 10])

#     b1 = tf.Variable(tf.zeros([300]))
#     b2 = tf.Variable(tf.zeros([300]))
#     b3 = tf.Variable(tf.zeros([10]))

#     l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
#     l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
#     pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)
    
#     correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
#     instance_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#     with tf.Session() as sess:
#         sess.run(tf.global_variables_initializer())
    
#         # Testing the model
#         LSMR_test = preprocess_data(df_test)
#         X_test, y_test = get_test(LSMR_test)
#         accuracy = 0.
#         for i in range(len(X_test)):
#             best_instance_accuracy = float("-inf")
#             for language, score, movie_id in W1:
#                 w_1 = W1[(language, score, movie_id)]
#                 w_2 = W2[(language, score)]
#                 w_3 = W3[score]
#                 a = instance_accuracy.eval({x: np.atleast_2d(X_test[i]), y: np.atleast_2d(y_test[i]),
#                                    w1:w_1,
#                                    w2:w_2,
#                                    w3:w_3})
#                 if a > best_instance_accuracy:
#                     best_instance_accuracy = a
#             accuracy += best_instance_accuracy

#     return accuracy/len(X_test)
# """

# 3-layer NN > needs at least 3 days for training

In [None]:
# gpu is a must
def train_deep(df_train, epochs=100, learning_rate=0.1, random_state=42):
    LSMR_train = preprocess_data(df_train)
    np.random.seed(random_state)
    data_dict, L1, L2, L3 = get_data_dict(LSMR_train, get_L2and3=True)
    init_weights = lambda layer, i, o: {k:2*np.random.random((i, o))-1 for k in layer}
    W1 = init_weights(L1, 300, 300)  # (languge, score, movie_id)
    W2 = init_weights(L2, 300, 300)  # (languge, score):
    W3 = init_weights(L3, 300, 10)  # score:
    
    
    reset_graph()
    x = tf.placeholder(tf.float32, [None, 300])
    y = tf.placeholder(tf.float32, [None, 10]) # 1-10 => 10 classes

    w1 = tf.Variable(tf.zeros([300, 300]))
    w2 = tf.Variable(tf.zeros([300, 300]))
    w3 = tf.Variable(tf.zeros([300, 10]))

    b1 = tf.Variable(tf.zeros([300]))
    b2 = tf.Variable(tf.zeros([300]))
    b3 = tf.Variable(tf.zeros([10]))

    l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
    l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
    pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)


    cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    training_curve = dict()
    with tf.device('/job:localhost/replica:0/task:0/device:GPU:0'):
        with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
            sess.run(tf.global_variables_initializer())
            for e in range(epochs+1):
                start = time.time()
                avg_cost = 0.
                for _, row in LSMR_train.iterrows():
                    score = row["Score"]
                    y_ = np.zeros(10)
                    y_[score-1] = 1
                    y_ = np.atleast_2d(y_)
                    x_ = np.atleast_2d(row["rev_vec"])
                    w_1, w_2, w_3 , _, c = sess.run([w1, w2, w3, optimizer, cost], feed_dict={x: x_,y: y_})               
                    avg_cost += c
                avg_cost /= len(LSMR_train)
                training_curve[e] = (avg_cost, time.time()-start)
                if e%10==0:
                    print("Epoch {}: {}".format(e, avg_cost))

    return w_1, w_2, w_3, training_curve

In [None]:
def test_deep(df_test, w_1, w_2, w_3):
    reset_graph()
    x = tf.placeholder(tf.float32, [None, 300])
    y = tf.placeholder(tf.float32, [None, 10]) # 1-10 => 10 classes

    w1 = tf.placeholder(tf.float32, [300, 300])
    w2 = tf.placeholder(tf.float32, [300, 300])
    w3 = tf.placeholder(tf.float32, [300, 10])

    b1 = tf.Variable(tf.zeros([300]))
    b2 = tf.Variable(tf.zeros([300]))
    b3 = tf.Variable(tf.zeros([10]))

    l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
    l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
    pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)
    
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    with tf.device('/job:localhost/replica:0/task:0/device:GPU:0'):
        with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
            sess.run(tf.global_variables_initializer())

            # Testing the model
            LSMR_test = preprocess_data(df_test)
            X_test, y_test = get_test(LSMR_test)
            return accuracy.eval({x: X_test,
                                  y: y_test,
                                  w1:w_1,w2:w_2,
                                  w3:w_3})

In [None]:
NUM_TRIALS = 1
scores_incremental = dict()
learning_curves = dict()
for i in range(NUM_TRIALS):
    scores_incremental[i] = dict()
    learning_curves[i] = dict()
    print("Trial:\t{}".format(i+1))
    k = 0
    skf = StratifiedKFold(n_splits=10, random_state=i)
    for train_index, test_index in skf.split(df["Review"], df["Language"]):
        start = time.time()
        w1, w2, w3, learning_curve = train_deep(df.loc[train_index], random_state=i, epochs=10000)
        s = test_deep(df.loc[test_index], w1, w2, w3)
        k += 1
        print("K:\t{}\nScore:\t{}".format(k, s))
        print("took:", time.time()-start)
        scores_incremental[i][k] = s
        learning_curves[i][k] = learning_curve
    print("*"*10)
    try:
        print("Trial {} avg score:\t {}".format(i+1, np.mean(list(scores_incremental[i].values()))))
    except:
        continue
    print("-"*30)