In [1]:
import tensorflow as tf
import pickle, pandas as pd, re, numpy as np, ast, warnings

from joblib import Parallel, delayed

import time

from collections import defaultdict, OrderedDict
from itertools import chain, starmap
from itertools import product
import unicodedata
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score, precision_score, accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline

from TurkishStemmer import TurkishStemmer
warnings.filterwarnings(action='ignore', category=UserWarning, module='gensim')
import gensim
from textblob import TextBlob

  from ._conv import register_converters as _register_converters


In [2]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    
reset_graph()

In [3]:
df = pd.read_csv("datasets/movie_data.csv")
df.head()

Unnamed: 0,Language,Movie_ID,Review,Score
0,en,-800777728,i love science fiction and i hate superheroes ...,9
1,en,-800777728,the movie is absolutely incredible all the per...,10
2,en,-1018312192,in a cinematic era dominated by reboots and mi...,8
3,en,-1018312192,movie review on rise of the planet of the apes...,4
4,en,-1018312192,during experiments to find a cure for alzheime...,7


In [4]:
df.groupby("Score").count()

Unnamed: 0_level_0,Language,Movie_ID,Review
Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,29,29,29
2,21,21,21
3,14,14,14
4,23,23,23
5,83,83,83
6,43,43,43
7,71,71,71
8,207,207,207
9,175,175,175
10,334,334,334


In [5]:
# en_vects = gensim.models.KeyedVectors.load_word2vec_format(r"../NLP_data/GoogleNews-vectors-negative300.bin", binary=True)

In [6]:
# tr_vects = gensim.models.KeyedVectors.load_word2vec_format(r"../NLP_data/wiki.tr/wiki.tr.vec", binary=False)

In [7]:
turkish_stemmer = TurkishStemmer()
def clean(text, language="en", stem=True):
    global turkish_stemmer
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').lower().decode("ascii")
    
    if language == "tr":
        if stem:
            text= ' '.join([turkish_stemmer.stem(w) for w in text.split()])
    text = re.sub(r"[^A-Za-z0-9^,!.\/'+-=]", " ", text)
    text = re.sub(r'[0-9]', '#', text)
    text = re.sub(r",", " ", text)
    text = re.sub(r"\.", " ", text)
    text = re.sub(r"!", " ", text)
    text = re.sub(r"\/", " ", text)
    text = re.sub(r"\^", " ", text)
    text = re.sub(r"\+", " ", text)
    text = re.sub(r"\-", " ", text)
    text = re.sub(r"\=", " ", text)
    text = re.sub(r"'", " ", text)
    text = re.sub(r":", " ", text)
    text = re.sub(r"e(\s)?-(\s)?mail", "email", text)

    text = re.sub(r"what's", "what is ", text)
    text = re.sub(r"\'s", " ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"can't", "cannot ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r" e g ", " eg ", text)
    text = re.sub(r" b g ", " bg ", text)
    text = re.sub(r" u s ", " american ", text)
    return TextBlob(text)


In [8]:
VECTOR_SIZE = 300
def vectorize(text, language):
    global VECTOR_SIZE            
    blob = clean(text, language)
    vector = np.zeros(VECTOR_SIZE)
    if len(blob.words) < 1:
        return None

    for word in blob.words:
        try:
            if language == "en":
                vector += globals()["en_vects"][word]
            else:
                vector += globals()["tr_vects"][word]
        except KeyError:
            continue
    vector /= len(blob.words)
    return vector

In [9]:
def getvec(x):
    lang, rev = x.split(":::::")
    return vectorize(rev, lang)

In [10]:
# LMSR
def preprocess_data(df, language_column="Language", review_column="Review"):
    LMSR_df = df.copy()
    LMSR_df["lang_rev"] = LMSR_df[[language_column, review_column]].apply(lambda x: x[0]+":::::"+x[1], axis=1)
    LMSR_df["rev_vec"] = LMSR_df["lang_rev"].apply(lambda x:getvec(x))
    LMSR_df.drop(["lang_rev", "Review"], axis=1, inplace=True)
    return LMSR_df

In [11]:
def distance_accuracy(y_true, y_predict):
    res = 0
    for i in range(len(y_true)):
        res += abs(y_true[i]-y_predict[i])
    return 1-res/(len(y_true)*len(set(y_true)))

In [12]:
def get_XYy(LMSR):
    X = np.zeros((len(LMSR), VECTOR_SIZE))
    Y = np.zeros((len(LMSR), VECTOR_SIZE))
    y = np.zeros((len(LMSR)))
    i = 0
    for rev in LMSR.iterrows():
        score = rev[1][2]
        rev_vec = rev[1][3]
        score_vec = rev[1][4]

        X[i] = rev_vec
        Y[i] = score_vec
        y[i] = score

        i += 1
    return X, Y, y

In [13]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

In [14]:
def sigmoid(x, derive=False):
    if derive:
        return x*(1-x)
    return 1/(1+np.exp(-x))

In [15]:
def get_data_dict(df, get_L2and3=False):
    data_dict = dict() #{language:{score: {movie_id: [rev1, rev2, ..., revn]}}}
    L1 = dict()  # {(languge, score, movie_id): list of reviews with the same score with the same language}
    L2 = dict()  # {(language, score): None}
    L3 = dict()  # {score: None}
    for _, row in df.iterrows():
        lang = row["Language"]
        movie_id = row["Movie_ID"]
        score = row["Score"]
        review = row["rev_vec"]

        data_dict.setdefault(lang, {})
        data_dict[lang].setdefault(score, {})
        data_dict[lang][score].setdefault(movie_id, [])
        data_dict[lang][score][movie_id].append(review)
        
        L1.setdefault((lang, score, movie_id), list())
        L1[(lang, score, movie_id)].append(review)
        if get_L2and3:    
            L2[(lang, score)] = None
            L3[score] = None
    if get_L2and3:
        return data_dict, L1, L2, L3
    return data_dict, L1

In [16]:
def get_L2(LSM_R, data_dict):
    L2 = dict()  # {(language, score): list of movies vectors}
    for language in data_dict:
        for score in data_dict[language]:
            for movie_id in data_dict[language][score]:
                L2.setdefault((language, score), list())
                L2[(language, score)].append(LSM_R[(language, score, movie_id)])
    return L2

In [17]:
def get_L3(LS_MR, data_dict):
    L3 = dict()  # {score: vector of merged languages for that score}
    for language in data_dict:
        for score in data_dict[language]:
            L3.setdefault(score, list())
            L3[score].append(LS_MR[(language, score)])
    return L3

In [18]:
def merge(L, W):
    merged = dict()  # {item: vector of merged subitems}
    for i, item in enumerate(sorted(L)):
        for subitem in L[item]:
            merged.setdefault(item, [np.zeros(VECTOR_SIZE),0])
            merged[item][0] += sigmoid(subitem.dot(W[i]))
            merged[item][1] += 1
    for item in merged:
        merged[item] = merged[item][0]/ merged[item][1]
    return merged

In [19]:
def update_weights(L, delta, W, alpha=0.1):
    for i, k in enumerate(sorted(L)):
        for l in L[k]:
            W[i] += l.T.dot(delta[i]) *alpha
    return W

In [20]:
def get_layer_error(delta, W):
    error = 0
    for i in range(len(delta)):
        error += delta[i].dot(W[i].T)
    return error/len(delta)

In [21]:
def get_layer_delta(error, layer, size):
    delta = np.zeros((size, VECTOR_SIZE))
    j = 0
    for i,k in enumerate(sorted(layer)):
        for l in layer[k]:
            delta[j] = error[i]*sigmoid(l, True)
            j += 1
    return delta

## Training

## Full-batch

In [38]:
def get_score_vects(df, iterations=100, alpha=0.1, random_state=42, W1=None, W2=None, W3=None, W4=None):
    LSMR = preprocess_data(df)
    data_dict, L1 = get_data_dict(LSMR)
    y = softmax(list(LSMR.Score))
#     np.random.seed(random_state)
    learning_curve = dict()
    for i in range(iterations+1):
        # forward propagation
        if W1 is None:
            W1 = 2*np.random.random((len(L1), 300, 300))-1

        LSM_R = merge(L1, W1)
        L2 = get_L2(LSM_R, data_dict)
        if W2 is None:
            W2 = 2*np.random.random((len(L2), 300, 300))-1

        LS_MR = merge(L2, W2)
        L3 = get_L3(LS_MR, data_dict)
        if W3 is None:
            W3 = 2*np.random.random((len(L3), 300, 300))-1

        score_vectors_dict = merge(L3, W3)
        l4 = sigmoid(np.array([v for k, v in sorted(score_vectors_dict.items())]))
        if W4 is None:
            W4 = 2*np.random.random((300, len(LSMR)))-1
        
        l5 = softmax(l4.dot(W4))  # predicted scores
        
        # Calculate the error
        l5_error = np.mean(np.dot(np.log(l5), y))
        
        # Back propagation
        l5_delta = l5_error * sigmoid(l5, True)
        W4 += l4.T.dot(l5_delta)*alpha
        
        l4_error = l5_delta.dot(W4.T)
        l4_delta = l4_error * sigmoid(l4, True)
        
        W3 = update_weights(L3, l4_delta, W3, alpha)
        
        l3_error = get_layer_error(l4_delta, W3)
        l3_delta = get_layer_delta(l3_error, L3, len(L2))
        
        W2 = update_weights(L2, l3_delta, W2, alpha)
        
        l2_error = get_layer_error(l3_delta, W2)
        l2_delta = get_layer_delta(l2_error, L2, len(LSMR))
        
        W1 = update_weights(L1, l2_delta, W1, alpha)
        learning_curve[i] = l5_error
        if i%10 == 0:
            print("epoch {}:\t{}".format(i, np.abs(l5_error)))
        if i%100 == 0:
            alpha *= 0.9
    return LSMR, score_vectors_dict, learning_curve

In [39]:
def fit(LSMR, score_vect_dicts,random_state=42, regressor=MLPRegressor(), classifier=MLPClassifier()):
    LSMR["score_vec"] = LSMR["Score"].apply(lambda x: score_vect_dicts[x] if x in score_vect_dicts else np.NaN)
    LSMR.dropna(inplace=True)
    
    X, Y, y = get_XYy(LSMR)
    
    regressor.random_state = random_state
    classifier.random_state = random_state
        
    regressor.fit(X, Y)
    classifier.fit(Y, y)
    return regressor, classifier

In [40]:
def predict(LSMR, score_vect_dicts, regressor, classifier):
    LSMR["score_vec"] = LSMR["Score"].apply(lambda x: score_vect_dicts[x] if x in score_vect_dicts else np.NaN)
    LSMR.dropna(inplace=True)
    
    X, Y, y = get_XYy(LSMR)
    
    preds_score_vecs = regressor.predict(X)
    pred_scores = classifier.predict(preds_score_vecs)
    
    return pred_scores, y

In [41]:
NUM_TRIALS = 1
scores = dict()
learning_curves = dict()
for i in range(NUM_TRIALS):
    print("Trial:\t{}".format(i+1))
    scores[i] = dict()
    learning_curves[i] = dict()
    k = 0
    skf = StratifiedKFold(n_splits=10, random_state=i)
    for train_index, test_index in skf.split(df["Review"], df["Language"]):
        start = time.time()
        LSMR, score_vect_dicts, training_curve = get_score_vects(df.loc[train_index], random_state=i)
        regressor, classifier = fit(LSMR, score_vect_dicts, random_state=i)
        preds, true = predict(preprocess_data(df.loc[test_index]), score_vect_dicts, regressor, classifier)
        s = distance_accuracy(true, preds)
        k += 1
        print("K:\t{}\nScore:\t{}".format(k, s))
        print("took:", time.time()-start, "seconds")
        scores[i][k] = s
        learning_curves[i][k] = training_curve
    print("*"*10)
    try:
        print("Trial {} avg score:\t {}".format(i+1, np.mean(list(scores[i].values()))))
    except:
        continue
    print("-"*30)

Trial:	1
epoch 0:	21.857929193952334
epoch 10:	11.78191135546589
epoch 20:	10.743861036946399
epoch 30:	10.564884861268293
epoch 40:	10.082234042025572
epoch 50:	9.801125166103542
epoch 60:	9.627557759616256
epoch 70:	9.516972934779547
epoch 80:	9.444329098905214
epoch 90:	9.394712575639238
epoch 100:	9.360967767028788
K:	1
Score:	0.7822222222222223
took: 24.64276146888733 seconds
epoch 0:	21.532691590684184
epoch 10:	11.766409764075963
epoch 20:	10.74274440048058
epoch 30:	10.179095377054258
epoch 40:	9.724601032603228
epoch 50:	9.472044163694722
epoch 60:	9.328757871814979
epoch 70:	9.24747872271433
epoch 80:	9.203535284813588
epoch 90:	9.181422301413708
epoch 100:	9.170314841048413
K:	2
Score:	0.745
took: 24.919804573059082 seconds
epoch 0:	22.65762101851372
epoch 10:	12.334990193081541
epoch 20:	10.89924189676108
epoch 30:	10.220339226671955
epoch 40:	9.858984762482653
epoch 50:	9.66579546814998
epoch 60:	9.562490542218395
epoch 70:	9.50355247016409
epoch 80:	9.464500105346442
epoc

In [42]:
pickle.dump([scores, learning_curves], open("batch_no_tf.results", "wb"))

## Incremental with tensorflow

In [43]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [44]:
def get_test(LSMR):
    X = dict()
    y = dict()
    for _, row in LSMR.iterrows():
        score = row["Score"]
        y_ = np.zeros(10)
        y_[score-1] = 1
        y[len(y)] = y_
        X[len(X)] = row["rev_vec"]
    return np.array(list(X.values())), np.array(list(y.values()))

In [45]:
def train_selective(df_train,epochs=100, learning_rate = 0.1, random_state=42):
    LSMR_train = preprocess_data(df_train)
    np.random.seed(random_state)
    data_dict, L1, L2, L3 = get_data_dict(LSMR_train, get_L2and3=True)
    init_weights = lambda layer, i, o: {k:2*np.random.random((i, o))-1 for k in layer}
    W1 = init_weights(L1, 300, 300)  # (languge, score, movie_id)
    W2 = init_weights(L2, 300, 300)  # (languge, score):
    W3 = init_weights(L3, 300, 10)  # score:
    
    
    reset_graph()
    x = tf.placeholder(tf.float32, [None, 300])
    y = tf.placeholder(tf.float32, [None, 10]) # 1-10 => 10 classes

    w1 = tf.placeholder(tf.float32, [300, 300])
    w2 = tf.placeholder(tf.float32, [300, 300])
    w3 = tf.placeholder(tf.float32, [300, 10])

    b1 = tf.Variable(tf.zeros([300]))
    b2 = tf.Variable(tf.zeros([300]))
    b3 = tf.Variable(tf.zeros([10]))

    l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
    l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
    pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)


    cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    training_curve = dict()
    with tf.device('/job:localhost/replica:0/task:0/device:GPU:0'):
        with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
            sess.run(tf.global_variables_initializer())
            for e in range(epochs+1):
                avg_cost = 0.
                for _, row in LSMR_train.iterrows():
                    lang = row["Language"]
                    movie_id = row["Movie_ID"]
                    score = row["Score"]
                    y_ = np.zeros(10)
                    y_[score-1] = 1
                    y_ = np.atleast_2d(y_)
                    x_ = np.atleast_2d(row["rev_vec"])
                    w1_,w2_,w3_,_, c = sess.run([w1, w2, w3, optimizer, cost],
                                             feed_dict={x: x_,
                                                        y: y_,
                                                        w1:W1[(lang, score, movie_id)],
                                                        w2:W2[(lang, score)],
                                                        w3:W3[score]})
                    W1[(lang, score, movie_id)] = w1_
                    W2[(lang, score)] = w2_
                    W3[score] = w3_

                    avg_cost += c
                training_curve[e] = avg_cost
                if e%10==0:
                    print("Epoch {}: {}".format(e, avg_cost/len(LSMR_train)))

            return W1, W2, W3, training_curve

In [46]:
def get_max_index(array):
    indx = None
    max_ = float("-inf")
    for i, e in enumerate(array):
        if e > max_:
            max_ = e
            indx = i
    return indx, max_

In [47]:
def predict_selective(df, W1, W2, W3):
    LSMR = preprocess_data(df)
    reset_graph()
    x = tf.placeholder(tf.float32, [None, 300])

    w1 = tf.placeholder(tf.float32, [300, 300])
    w2 = tf.placeholder(tf.float32, [300, 300])
    w3 = tf.placeholder(tf.float32, [300, 10])

    b1 = tf.Variable(tf.zeros([300]))
    b2 = tf.Variable(tf.zeros([300]))
    b3 = tf.Variable(tf.zeros([10]))

    l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
    l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
    pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)

    
    
    
    prediction = tf.argmax(pred, 1)
    preds = np.zeros(len(LSMR))
    with tf.device('/job:localhost/replica:0/task:0/device:GPU:0'):
        with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
            sess.run(tf.global_variables_initializer())
            j = 0
            for _, row in LSMR.iterrows():
                v = row["rev_vec"]
                predicted_scores = np.zeros(len(W1))
                for i, info in enumerate(W1):
                    language, score, movie_id = info
                    w_1 = W1[(language, score, movie_id)]
                    w_2 = W2[(language, score)]
                    w_3 = W3[score]

                    predicted_scores[i] = prediction.eval({x: np.atleast_2d(v),
                                                           w1:w_1,w2:w_2,w3:w_3})

                max_index, probability = get_max_index(softmax(predicted_scores))
                predicted_score = predicted_scores[max_index]

                preds[j] = predicted_score
                j+=1


    return preds, np.array(list(LSMR.Score))

In [49]:
NUM_TRIALS = 1
scores_incremental = dict()
learning_curves = dict()
for i in range(NUM_TRIALS):
    print("Trial:\t{}".format(i+1))
    scores_incremental[i] = dict()
    learning_curves[i] = dict()
    k = 0
    skf = StratifiedKFold(n_splits=10, random_state=i)
    for train_index, test_index in skf.split(df["Review"], df["Language"]):
        start = time.time()
        # approx 3 epochs per second
        W1, W2, W3, training_curve = train_selective(df.loc[train_index], epochs=200)
        
        preds, true = predict_selective(df.loc[test_index], W1, W2, W3)
        s = distance_accuracy(true, preds)
        scores_incremental[i][k] = s
        learning_curves[i][k] = training_curve
        k += 1
        print("K:\t{}\nScore:\t{}".format(k, s))
        print("took:", time.time()-start)
        scores_incremental[i][k] = s
        learning_curves[i][k] = training_curve
    print("*"*10)
    try:
        print("Trial {} avg score:\t {}".format(i+1, np.mean(list(scores_incremental[i].values()))))
    except:
        continue
    print("-"*30)

Trial:	1
Epoch 0: 1.6382539035607544
Epoch 10: 0.00798041373439547
Epoch 20: 0.00414424662976267
Epoch 30: 0.002863710963413016
Epoch 40: 0.002210395185328606
Epoch 50: 0.0018101873358439965
Epoch 60: 0.001538219162547547
Epoch 70: 0.001340616424115271
Epoch 80: 0.0011901179336645227
Epoch 90: 0.0010714582095129623
Epoch 100: 0.0009753979668150552
Epoch 110: 0.000895884656217984
Epoch 120: 0.0008289639292504741
Epoch 130: 0.0007718336999662117
Epoch 140: 0.0007223806926049293
Epoch 150: 0.0006791661756869871
Epoch 160: 0.0006410830465675745
Epoch 170: 0.0006072232363415727
Epoch 180: 0.0005769201475130911
Epoch 190: 0.0005496473716306759
Epoch 200: 0.0005249699332408555
K:	1
Score:	0.8311111111111111
took: 254.67744493484497
Epoch 0: 1.1594848213867388
Epoch 10: 0.005971914769227927
Epoch 20: 0.0031005067735289534
Epoch 30: 0.0021304092739592305
Epoch 40: 0.0016353528789800798
Epoch 50: 0.0013328843151167449
Epoch 60: 0.0011280476619948685
Epoch 70: 0.0009797295794932224
Epoch 80: 0.00

In [51]:
pickle.dump([scores_incremental, learning_curves], open("incremental_tf.results", "wb"))

In [None]:
# HOW TO CHEAT LIKE A PRO
# """
# def test_selective(df_test, W1, W2, W3):
#     reset_graph()
#     x = tf.placeholder(tf.float32, [None, 300])
#     y = tf.placeholder(tf.float32, [None, 10]) # 1-10 => 10 classes

#     w1 = tf.placeholder(tf.float32, [300, 300])
#     w2 = tf.placeholder(tf.float32, [300, 300])
#     w3 = tf.placeholder(tf.float32, [300, 10])

#     b1 = tf.Variable(tf.zeros([300]))
#     b2 = tf.Variable(tf.zeros([300]))
#     b3 = tf.Variable(tf.zeros([10]))

#     l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
#     l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
#     pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)
    
#     correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
#     instance_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#     with tf.Session() as sess:
#         sess.run(tf.global_variables_initializer())
    
#         # Testing the model
#         LSMR_test = preprocess_data(df_test)
#         X_test, y_test = get_test(LSMR_test)
#         accuracy = 0.
#         for i in range(len(X_test)):
#             best_instance_accuracy = float("-inf")
#             for language, score, movie_id in W1:
#                 w_1 = W1[(language, score, movie_id)]
#                 w_2 = W2[(language, score)]
#                 w_3 = W3[score]
#                 a = instance_accuracy.eval({x: np.atleast_2d(X_test[i]), y: np.atleast_2d(y_test[i]),
#                                    w1:w_1,
#                                    w2:w_2,
#                                    w3:w_3})
#                 if a > best_instance_accuracy:
#                     best_instance_accuracy = a
#             accuracy += best_instance_accuracy

#     return accuracy/len(X_test)
# """

# 3-layer NN > needs at least 3 days for training

In [31]:
# gpu is a must
def train_deep(df_train, epochs=100, learning_rate=0.1, random_state=42):
    LSMR_train = preprocess_data(df_train)
    np.random.seed(random_state)
    data_dict, L1, L2, L3 = get_data_dict(LSMR_train, get_L2and3=True)
    init_weights = lambda layer, i, o: {k:2*np.random.random((i, o))-1 for k in layer}
    W1 = init_weights(L1, 300, 300)  # (languge, score, movie_id)
    W2 = init_weights(L2, 300, 300)  # (languge, score):
    W3 = init_weights(L3, 300, 10)  # score:
    
    
    reset_graph()
    x = tf.placeholder(tf.float32, [None, 300])
    y = tf.placeholder(tf.float32, [None, 10]) # 1-10 => 10 classes

    w1 = tf.Variable(tf.zeros([300, 300]))
    w2 = tf.Variable(tf.zeros([300, 300]))
    w3 = tf.Variable(tf.zeros([300, 10]))

    b1 = tf.Variable(tf.zeros([300]))
    b2 = tf.Variable(tf.zeros([300]))
    b3 = tf.Variable(tf.zeros([10]))

    l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
    l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
    pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)


    cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    training_curve = dict()
    with tf.device('/job:localhost/replica:0/task:0/device:GPU:0'):
        with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
            sess.run(tf.global_variables_initializer())
            for e in range(epochs+1):
                start = time.time()
                avg_cost = 0.
                for _, row in LSMR_train.iterrows():
                    score = row["Score"]
                    y_ = np.zeros(10)
                    y_[score-1] = 1
                    y_ = np.atleast_2d(y_)
                    x_ = np.atleast_2d(row["rev_vec"])
                    w_1, w_2, w_3 , _, c = sess.run([w1, w2, w3, optimizer, cost], feed_dict={x: x_,y: y_})               
                    avg_cost += c
                avg_cost /= len(LSMR_train)
                training_curve[e] = (avg_cost, time.time()-start)
                if e%10==0:
                    print("Epoch {}: {}".format(e, avg_cost))

    return w_1, w_2, w_3, training_curve

In [34]:
def test_deep(df_test, w_1, w_2, w_3):
    reset_graph()
    x = tf.placeholder(tf.float32, [None, 300])
    y = tf.placeholder(tf.float32, [None, 10]) # 1-10 => 10 classes

    w1 = tf.placeholder(tf.float32, [300, 300])
    w2 = tf.placeholder(tf.float32, [300, 300])
    w3 = tf.placeholder(tf.float32, [300, 10])

    b1 = tf.Variable(tf.zeros([300]))
    b2 = tf.Variable(tf.zeros([300]))
    b3 = tf.Variable(tf.zeros([10]))

    l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
    l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
    pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)
    
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    with tf.device('/job:localhost/replica:0/task:0/device:GPU:0'):
        with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
            sess.run(tf.global_variables_initializer())

            # Testing the model
            LSMR_test = preprocess_data(df_test)
            X_test, y_test = get_test(LSMR_test)
            return accuracy.eval({x: X_test,
                                  y: y_test,
                                  w1:w_1,w2:w_2,
                                  w3:w_3})

In [37]:
NUM_TRIALS = 1
scores_incremental = dict()
learning_curves = dict()
for i in range(NUM_TRIALS):
    scores_incremental[i] = dict()
    learning_curves[i] = dict()
    print("Trial:\t{}".format(i+1))
    k = 0
    skf = StratifiedKFold(n_splits=10, random_state=i)
    for train_index, test_index in skf.split(df["Review"], df["Language"]):
        start = time.time()
        w1, w2, w3, learning_curve = train_deep(df.loc[train_index], random_state=i, epochs=10000)
        s = test_deep(df.loc[test_index], w1, w2, w3)
        k += 1
        print("K:\t{}\nScore:\t{}".format(k, s))
        print("took:", time.time()-start)
        scores_incremental[i][k] = s
        learning_curves[i][k] = learning_curve
    print("*"*10)
    try:
        print("Trial {} avg score:\t {}".format(i+1, np.mean(list(scores_incremental[i].values()))))
    except:
        continue
    print("-"*30)

Trial:	1
K:	1
Score:	0.2800000011920929
took: 0.1636667251586914
K:	2
Score:	0.2800000011920929
took: 0.18086695671081543
K:	3
Score:	0.3700000047683716
took: 0.22888827323913574
K:	4
Score:	0.3400000035762787
took: 0.1683807373046875
K:	5
Score:	0.3700000047683716
took: 0.16428303718566895
K:	6
Score:	0.33000001311302185
took: 0.16387176513671875
K:	7
Score:	0.3400000035762787
took: 0.2004718780517578
K:	8
Score:	0.33000001311302185
took: 0.18352031707763672
K:	9
Score:	0.30000001192092896
took: 0.16056609153747559
K:	10
Score:	0.4000000059604645
took: 0.16551804542541504
**********
Trial 1 avg score:	 0.33400002121925354
------------------------------
