In [2]:
import tensorflow as tf
import pickle, pandas as pd, re, numpy as np, ast, warnings

from joblib import Parallel, delayed

import time

from collections import defaultdict, OrderedDict
from itertools import chain, starmap
from itertools import product
import unicodedata
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score, precision_score, accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline

from TurkishStemmer import TurkishStemmer
warnings.filterwarnings(action='ignore', category=UserWarning, module='gensim')
import gensim
from textblob import TextBlob

  from ._conv import register_converters as _register_converters


In [3]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    
reset_graph()

In [4]:
df = pd.read_csv("datasets/movie_data.csv")
df.head()

Unnamed: 0,Language,Movie_ID,Review,Score
0,en,-800777728,i love science fiction and i hate superheroes ...,9
1,en,-800777728,the movie is absolutely incredible all the per...,10
2,en,-1018312192,in a cinematic era dominated by reboots and mi...,8
3,en,-1018312192,movie review on rise of the planet of the apes...,4
4,en,-1018312192,during experiments to find a cure for alzheime...,7


In [5]:
df.groupby("Score").count()

Unnamed: 0_level_0,Language,Movie_ID,Review
Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,29,29,29
2,21,21,21
3,14,14,14
4,23,23,23
5,83,83,83
6,43,43,43
7,71,71,71
8,207,207,207
9,175,175,175
10,334,334,334


In [6]:
en_vects = gensim.models.KeyedVectors.load_word2vec_format(r"../NLP_data/GoogleNews-vectors-negative300.bin", binary=True)

In [7]:
tr_vects = gensim.models.KeyedVectors.load_word2vec_format(r"../NLP_data/wiki.tr/wiki.tr.vec", binary=False)

In [8]:
turkish_stemmer = TurkishStemmer()
def clean(text, language="en", stem=True):
    global turkish_stemmer
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').lower().decode("ascii")
    
    if language == "tr":
        if stem:
            text= ' '.join([turkish_stemmer.stem(w) for w in text.split()])
    text = re.sub(r"[^A-Za-z0-9^,!.\/'+-=]", " ", text)
    text = re.sub(r'[0-9]', '#', text)
    text = re.sub(r",", " ", text)
    text = re.sub(r"\.", " ", text)
    text = re.sub(r"!", " ", text)
    text = re.sub(r"\/", " ", text)
    text = re.sub(r"\^", " ", text)
    text = re.sub(r"\+", " ", text)
    text = re.sub(r"\-", " ", text)
    text = re.sub(r"\=", " ", text)
    text = re.sub(r"'", " ", text)
    text = re.sub(r":", " ", text)
    text = re.sub(r"e(\s)?-(\s)?mail", "email", text)

    text = re.sub(r"what's", "what is ", text)
    text = re.sub(r"\'s", " ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"can't", "cannot ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r" e g ", " eg ", text)
    text = re.sub(r" b g ", " bg ", text)
    text = re.sub(r" u s ", " american ", text)
    return TextBlob(text)


In [9]:
VECTOR_SIZE = 300
def vectorize(text, language):
    global VECTOR_SIZE            
    blob = clean(text, language)
    vector = np.zeros(VECTOR_SIZE)
    if len(blob.words) < 1:
        return None

    for word in blob.words:
        try:
            if language == "en":
                vector += globals()["en_vects"][word]
            else:
                vector += globals()["tr_vects"][word]
        except KeyError:
            continue
    vector /= len(blob.words)
    return vector

In [10]:
def getvec(x):
    lang, rev = x.split(":::::")
    return vectorize(rev, lang)

In [11]:
# LMSR
def preprocess_data(df, language_column="Language", review_column="Review"):
    LMSR_df = df.copy()
    LMSR_df["lang_rev"] = LMSR_df[[language_column, review_column]].apply(lambda x: x[0]+":::::"+x[1], axis=1)
    LMSR_df["rev_vec"] = LMSR_df["lang_rev"].apply(lambda x:getvec(x))
    LMSR_df.drop(["lang_rev", "Review"], axis=1, inplace=True)
    return LMSR_df

In [12]:
def distance_accuracy(y_true, y_predict):
    res = 0
    for i in range(len(y_true)):
        res += abs(y_true[i]-y_predict[i])
    return 1-res/(len(y_true)*len(set(y_true)))

In [13]:
def get_XYy(LMSR):
    X = np.zeros((len(LMSR), VECTOR_SIZE))
    Y = np.zeros((len(LMSR), VECTOR_SIZE))
    y = np.zeros((len(LMSR)))
    i = 0
    for rev in LMSR.iterrows():
        score = rev[1][2]
        rev_vec = rev[1][3]
        score_vec = rev[1][4]

        X[i] = rev_vec
        Y[i] = score_vec
        y[i] = score

        i += 1
    return X, Y, y

In [14]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

In [15]:
def sigmoid(x, derive=False):
    if derive:
        return x*(1-x)
    return 1/(1+np.exp(-x))

In [16]:
def get_data_dict(df, get_L2and3=False):
    data_dict = dict() #{language:{score: {movie_id: [rev1, rev2, ..., revn]}}}
    L1 = dict()  # {(languge, score, movie_id): list of reviews with the same score with the same language}
    L2 = dict()  # {(language, score): None}
    L3 = dict()  # {score: None}
    for _, row in df.iterrows():
        lang = row["Language"]
        movie_id = row["Movie_ID"]
        score = row["Score"]
        review = row["rev_vec"]

        data_dict.setdefault(lang, {})
        data_dict[lang].setdefault(score, {})
        data_dict[lang][score].setdefault(movie_id, [])
        data_dict[lang][score][movie_id].append(review)
        
        L1.setdefault((lang, score, movie_id), list())
        L1[(lang, score, movie_id)].append(review)
        if get_L2and3:    
            L2[(lang, score)] = None
            L3[score] = None
    if get_L2and3:
        return data_dict, L1, L2, L3
    return data_dict, L1

In [17]:
def get_L2(LSM_R, data_dict):
    L2 = dict()  # {(language, score): list of movies vectors}
    for language in data_dict:
        for score in data_dict[language]:
            for movie_id in data_dict[language][score]:
                L2.setdefault((language, score), list())
                L2[(language, score)].append(LSM_R[(language, score, movie_id)])
    return L2

In [18]:
def get_L3(LS_MR, data_dict):
    L3 = dict()  # {score: vector of merged languages for that score}
    for language in data_dict:
        for score in data_dict[language]:
            L3.setdefault(score, list())
            L3[score].append(LS_MR[(language, score)])
    return L3

In [19]:
def merge(L, W):
    merged = dict()  # {item: vector of merged subitems}
    for i, item in enumerate(sorted(L)):
        for subitem in L[item]:
            merged.setdefault(item, [np.zeros(VECTOR_SIZE),0])
            merged[item][0] += sigmoid(subitem.dot(W[i]))
            merged[item][1] += 1
    for item in merged:
        merged[item] = merged[item][0]/ merged[item][1]
    return merged

In [20]:
def update_weights(L, delta, W, alpha=0.1):
    for i, k in enumerate(sorted(L)):
        for l in L[k]:
            W[i] += l.T.dot(delta[i]) *alpha
    return W

In [21]:
def get_layer_error(delta, W):
    error = 0
    for i in range(len(delta)):
        error += delta[i].dot(W[i].T)
    return error/len(delta)

In [22]:
def get_layer_delta(error, layer, size):
    delta = np.zeros((size, VECTOR_SIZE))
    j = 0
    for i,k in enumerate(sorted(layer)):
        for l in layer[k]:
            delta[j] = error[i]*sigmoid(l, True)
            j += 1
    return delta

## Training

## Full-batch

In [23]:
def get_score_vects(df, iterations=100, alpha=0.1, random_state=42, W1=None, W2=None, W3=None, W4=None):
    LSMR = preprocess_data(df)
    data_dict, L1 = get_data_dict(LSMR)
    y = softmax(list(LSMR.Score))
#     np.random.seed(random_state)
    learning_curve = dict()
    for i in range(iterations+1):
        # forward propagation
        if W1 is None:
            W1 = 2*np.random.random((len(L1), 300, 300))-1

        LSM_R = merge(L1, W1)
        L2 = get_L2(LSM_R, data_dict)
        if W2 is None:
            W2 = 2*np.random.random((len(L2), 300, 300))-1

        LS_MR = merge(L2, W2)
        L3 = get_L3(LS_MR, data_dict)
        if W3 is None:
            W3 = 2*np.random.random((len(L3), 300, 300))-1

        score_vectors_dict = merge(L3, W3)
        l4 = sigmoid(np.array([v for k, v in sorted(score_vectors_dict.items())]))
        if W4 is None:
            W4 = 2*np.random.random((300, len(LSMR)))-1
        
        l5 = softmax(l4.dot(W4))  # predicted scores
        
        # Calculate the error
        l5_error = np.mean(np.dot(np.log(l5), y))
        
        # Back propagation
        l5_delta = l5_error * sigmoid(l5, True)
        W4 += l4.T.dot(l5_delta)*alpha
        
        l4_error = l5_delta.dot(W4.T)
        l4_delta = l4_error * sigmoid(l4, True)
        
        W3 = update_weights(L3, l4_delta, W3, alpha)
        
        l3_error = get_layer_error(l4_delta, W3)
        l3_delta = get_layer_delta(l3_error, L3, len(L2))
        
        W2 = update_weights(L2, l3_delta, W2, alpha)
        
        l2_error = get_layer_error(l3_delta, W2)
        l2_delta = get_layer_delta(l2_error, L2, len(LSMR))
        
        W1 = update_weights(L1, l2_delta, W1, alpha)
        learning_curve[i] = l5_error
        if i%10 == 0:
            print("Iteration {}:\t{}".format(i, np.abs(l5_error)))
    return LSMR, score_vectors_dict, learning_curve

In [24]:
def fit(LSMR, score_vect_dicts,random_state=42, regressor=MLPRegressor(), classifier=MLPClassifier()):
    LSMR["score_vec"] = LSMR["Score"].apply(lambda x: score_vect_dicts[x] if x in score_vect_dicts else np.NaN)
    LSMR.dropna(inplace=True)
    
    X, Y, y = get_XYy(LSMR)
    
    regressor.random_state = random_state
    classifier.random_state = random_state
        
    regressor.fit(X, Y)
    classifier.fit(Y, y)
    return regressor, classifier

In [25]:
def predict(LSMR, score_vect_dicts, regressor, classifier):
    LSMR["score_vec"] = LSMR["Score"].apply(lambda x: score_vect_dicts[x] if x in score_vect_dicts else np.NaN)
    LSMR.dropna(inplace=True)
    
    X, Y, y = get_XYy(LSMR)
    
    preds_score_vecs = regressor.predict(X)
    pred_scores = classifier.predict(preds_score_vecs)
    
    return pred_scores, y

In [198]:
NUM_TRIALS = 1
scores = dict()
learning_curve = dict()
for i in range(NUM_TRIALS):
    print("Trial:\t{}".format(i+1))
    scores[i] = dict()
    learning_curves[i] = dict()
    k = 0
    skf = StratifiedKFold(n_splits=10, random_state=i)
    for train_index, test_index in skf.split(df["Review"], df["Language"]):
        start = time.time()
        LSMR, score_vect_dicts, training_curve = get_score_vects(df.loc[train_index], random_state=i)
        regressor, classifier = fit(LSMR, score_vect_dicts, random_state=i)
        preds, true = predict(preprocess_data(df.loc[test_index]), score_vect_dicts, regressor, classifier)
        s = distance_accuracy(true, preds)
        k += 1
        print("K:\t{}\nScore:\t{}".format(k, s))
        print("took:", time.time()-start, "seconds")
        scores[i][k] = s
        learning_curves[i][k] = training_curve
    print("*"*10)
    try:
        print("Trial {} avg score:\t {}".format(i+1, np.mean(list(scores[i].values()))))
    except:
        continue
    print("-"*30)

Trial:	1
Iteration 0:	23.05032027737502


  after removing the cwd from sys.path.


Iteration 10:	12.013688806867378
Iteration 20:	10.744270890680427
Iteration 30:	10.114251226703924
Iteration 40:	9.75217132158491
Iteration 50:	9.52156371951053
Iteration 60:	9.37614311021223
Iteration 70:	9.280248022521429
Iteration 80:	9.218285276774253
Iteration 90:	9.18114695146247
Iteration 100:	9.161032448418315
K:	1
Score:	0.7822222222222223
took: 30.43852734565735 seconds
Iteration 0:	21.13186922373753
Iteration 10:	11.35668325278657
Iteration 20:	10.51285456868467
Iteration 30:	10.45535150004107
Iteration 40:	9.89214040730134
Iteration 50:	9.597518842700218
Iteration 60:	9.419876479393006
Iteration 70:	9.306109132276688
Iteration 80:	9.229259279797853
Iteration 90:	9.178043524843314
Iteration 100:	9.14513334364513
K:	2
Score:	0.745
took: 24.68630599975586 seconds
Iteration 0:	24.175998239788285
Iteration 10:	12.461546686531529
Iteration 20:	10.910005342716062
Iteration 30:	10.245256648082933
Iteration 40:	9.872941382060517
Iteration 50:	9.6527317486706
Iteration 60:	9.51940342

In [199]:
pickle.dump([scores, learning_curves], open("batch_no_tf.results", "wb"))

In [None]:
# stats = pd.DataFrame(list(scores))
# stats.describe()

## Incremental with tensorflow

In [26]:
import tensorflow as tf

In [27]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [28]:
def get_test(LSMR):
    X = dict()
    y = dict()
    for _, row in LSMR.iterrows():
        score = row["Score"]
        y_ = np.zeros(10)
        y_[score-1] = 1
        y[len(y)] = y_
        X[len(X)] = row["rev_vec"]
    return np.array(list(X.values())), np.array(list(y.values()))

In [29]:
# def test(df_test, w_1, w_2, w_3):
#     reset_graph()
#     x = tf.placeholder(tf.float32, [None, 300])
#     y = tf.placeholder(tf.float32, [None, 10]) # 1-10 => 10 classes

#     w1 = tf.placeholder(tf.float32, [300, 300])
#     w2 = tf.placeholder(tf.float32, [300, 300])
#     w3 = tf.placeholder(tf.float32, [300, 10])

#     b1 = tf.Variable(tf.zeros([300]))
#     b2 = tf.Variable(tf.zeros([300]))
#     b3 = tf.Variable(tf.zeros([10]))

#     l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
#     l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
#     pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)
    
#     correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
#     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
#     with tf.Session() as sess:
#         sess.run(tf.global_variables_initializer())
    
#         # Testing the model
#         LSMR_test = preprocess_data(df_test)
#         X_test, y_test = get_test(LSMR_test)
#         return accuracy.eval({x: X_test,
#                               y: y_test,
#                               w1:w_1,w2:w_2,
#                               w3:w_3})

In [30]:
# NUM_TRIALS = 5
# scores = list(np.zeros(NUM_TRIALS))
# for i in range(NUM_TRIALS):
#     print("Trial:\t{}".format(i+1))
#     score_dict = {"distance_accuracy":0}
#     k = 0
#     skf = StratifiedKFold(n_splits=10, random_state=i)
#     for train_index, test_index in skf.split(df["Review"], df["Language"]):
#         w1, w2, w3 = train(df.loc[train_index], random_state=i, epochs=50000)
#         s = test(df.loc[test_index], w1, w2, w3)
#         score_dict["distance_accuracy"] += s
#         k += 1
#         print("K:\t{}\nScore:\t{}".format(k, s))
#     score_dict["distance_accuracy"] /= 10.0
#     scores[i] = score_dict["distance_accuracy"]
#     print("*"*10)
#     print("Trial{} avg score:\t {}".format(i, score_dict["distance_accuracy"]))
#     print("-"*30)

In [31]:
def train_selective(df_train,epochs=100, learning_rate = 0.1, random_state=42):
    LSMR_train = preprocess_data(df_train)
    np.random.seed(random_state)
    data_dict, L1, L2, L3 = get_data_dict(LSMR_train, get_L2and3=True)
    init_weights = lambda layer, i, o: {k:2*np.random.random((i, o))-1 for k in layer}
    W1 = init_weights(L1, 300, 300)  # (languge, score, movie_id)
    W2 = init_weights(L2, 300, 300)  # (languge, score):
    W3 = init_weights(L3, 300, 10)  # score:
    
    
    reset_graph()
    x = tf.placeholder(tf.float32, [None, 300])
    y = tf.placeholder(tf.float32, [None, 10]) # 1-10 => 10 classes

    w1 = tf.placeholder(tf.float32, [300, 300])
    w2 = tf.placeholder(tf.float32, [300, 300])
    w3 = tf.placeholder(tf.float32, [300, 10])

    b1 = tf.Variable(tf.zeros([300]))
    b2 = tf.Variable(tf.zeros([300]))
    b3 = tf.Variable(tf.zeros([10]))

    l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
    l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
    pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)


    cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    training_curve = dict()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for e in range(epochs+1):
            avg_cost = 0.
            for _, row in LSMR_train.iterrows():
                lang = row["Language"]
                movie_id = row["Movie_ID"]
                score = row["Score"]
                y_ = np.zeros(10)
                y_[score-1] = 1
                y_ = np.atleast_2d(y_)
                x_ = np.atleast_2d(row["rev_vec"])
                w1_,w2_,w3_,_, c = sess.run([w1, w2, w3, optimizer, cost],
                                         feed_dict={x: x_,
                                                    y: y_,
                                                    w1:W1[(lang, score, movie_id)],
                                                    w2:W2[(lang, score)],
                                                    w3:W3[score]})
                W1[(lang, score, movie_id)] = w1_
                W2[(lang, score)] = w2_
                W3[score] = w3_
                
                avg_cost += c
            training_curve[e] = avg_cost
            if e%10==0:
                print("Epoch {}: {}".format(e, avg_cost/len(LSMR_train)))
        
        return W1, W2, W3, training_curve

In [32]:
def get_max_index(array):
    indx = None
    max_ = float("-inf")
    for i, e in enumerate(array):
        if e > max_:
            max_ = e
            indx = i
    return indx, max_

In [33]:
def predict_selective(df, W1, W2, W3):
    LSMR = preprocess_data(df)
    reset_graph()
    x = tf.placeholder(tf.float32, [None, 300])

    w1 = tf.placeholder(tf.float32, [300, 300])
    w2 = tf.placeholder(tf.float32, [300, 300])
    w3 = tf.placeholder(tf.float32, [300, 10])

    b1 = tf.Variable(tf.zeros([300]))
    b2 = tf.Variable(tf.zeros([300]))
    b3 = tf.Variable(tf.zeros([10]))

    l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
    l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
    pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)

    
    
    
    prediction = tf.argmax(pred, 1)
    preds = np.zeros(len(LSMR))
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        j = 0
        for _, row in LSMR.iterrows():
            v = row["rev_vec"]
            predicted_scores = np.zeros(len(W1))
            for i, info in enumerate(W1):
                language, score, movie_id = info
                w_1 = W1[(language, score, movie_id)]
                w_2 = W2[(language, score)]
                w_3 = W3[score]
                
                predicted_scores[i] = prediction.eval({x: np.atleast_2d(v),
                                                       w1:w_1,w2:w_2,w3:w_3})
                
            max_index, probability = get_max_index(softmax(predicted_scores))
            predicted_score = predicted_scores[max_index]
            
            preds[j] = predicted_score
            j+=1
    
    
    return preds, np.array(list(LSMR.Score))

In [34]:
NUM_TRIALS = 1
scores_incremental = dict()
learning_curves = dict()
for i in range(NUM_TRIALS):
    print("Trial:\t{}".format(i+1))
    scores_incremental[i] = dict()
    learning_curves[i] = dict()
    k = 0
    skf = StratifiedKFold(n_splits=10, random_state=i)
    for train_index, test_index in skf.split(df["Review"], df["Language"]):
        start = time.time()
        # approximately 100 epochs per minute
        W1, W2, W3, training_curve = train_selective(df.loc[train_index], epochs=5000)
        
        preds, true = predict_selective(df.loc[test_index], W1, W2, W3)
        s = distance_accuracy(true, preds)
        scores_incremental[i][k] = s
        learning_curves[i][k] = training_curve
        k += 1
        print("K:\t{}\nScore:\t{}".format(k, s))
        print("took:", time.time()-start)
        scores_incremental[i][k] = s
        learning_curves[i][k] = training_curve
    print("*"*10)
    try:
        print("Trial {} avg score:\t {}".format(i+1, np.mean(list(scores_incremental[i].values()))))
    except:
        continue
    print("-"*30)

Trial:	1
Epoch 0: 2.9755149090860424
Epoch 10: 0.03301395801265951
Epoch 20: 0.009102185252193319
Epoch 30: 0.005444824386233652
Epoch 40: 0.003938626393603752
Epoch 50: 0.003107796458510721
Epoch 60: 0.002577785003723976
Epoch 70: 0.0022086838019789766
Epoch 80: 0.0019360923293665008
Epoch 90: 0.0017260877580851128
Epoch 100: 0.0015590692933245818
Epoch 110: 0.0014229088037864637
Epoch 120: 0.0013096479197252443
Epoch 130: 0.0012138942475146703
Epoch 140: 0.0011318201913728546
Epoch 150: 0.0010606514541244552
Epoch 160: 0.0009983230416030942
Epoch 170: 0.0009432612553825948
Epoch 180: 0.000894240514087667
Epoch 190: 0.0008503051894033181
Epoch 200: 0.0008106973505202102
Epoch 210: 0.0007747959017999367
Epoch 220: 0.0007420984184180826
Epoch 230: 0.0007121829090133108
Epoch 240: 0.0006847060506912486
Epoch 250: 0.000659379128109347
Epoch 260: 0.0006359545544037499
Epoch 270: 0.0006142189659869423
Epoch 280: 0.0005939980175703062
Epoch 290: 0.0005751360640951791
Epoch 300: 0.00055749435

KeyboardInterrupt: 

In [None]:
pickle.dump([scores_incremental, learning_curves], open("incremental_tf.results", "wb"))

In [178]:
# HOW TO CHEAT LIKE A PRO
# """
# def test_selective(df_test, W1, W2, W3):
#     reset_graph()
#     x = tf.placeholder(tf.float32, [None, 300])
#     y = tf.placeholder(tf.float32, [None, 10]) # 1-10 => 10 classes

#     w1 = tf.placeholder(tf.float32, [300, 300])
#     w2 = tf.placeholder(tf.float32, [300, 300])
#     w3 = tf.placeholder(tf.float32, [300, 10])

#     b1 = tf.Variable(tf.zeros([300]))
#     b2 = tf.Variable(tf.zeros([300]))
#     b3 = tf.Variable(tf.zeros([10]))

#     l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
#     l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
#     pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)
    
#     correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
#     instance_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#     with tf.Session() as sess:
#         sess.run(tf.global_variables_initializer())
    
#         # Testing the model
#         LSMR_test = preprocess_data(df_test)
#         X_test, y_test = get_test(LSMR_test)
#         accuracy = 0.
#         for i in range(len(X_test)):
#             best_instance_accuracy = float("-inf")
#             for language, score, movie_id in W1:
#                 w_1 = W1[(language, score, movie_id)]
#                 w_2 = W2[(language, score)]
#                 w_3 = W3[score]
#                 a = instance_accuracy.eval({x: np.atleast_2d(X_test[i]), y: np.atleast_2d(y_test[i]),
#                                    w1:w_1,
#                                    w2:w_2,
#                                    w3:w_3})
#                 if a > best_instance_accuracy:
#                     best_instance_accuracy = a
#             accuracy += best_instance_accuracy

#     return accuracy/len(X_test)
# """

In [None]:
# stats = pd.DataFrame(list(scores))
# stats.describe()

In [88]:
# needs a gpu
# def train(df_train, epochs=100, learning_rate=0.1, random_state=42):
#     LSMR_train = preprocess_data(df_train)
#     np.random.seed(random_state)
#     data_dict, L1, L2, L3 = get_data_dict(LSMR_train, get_L2and3=True)
#     init_weights = lambda layer, i, o: {k:2*np.random.random((i, o))-1 for k in layer}
#     W1 = init_weights(L1, 300, 300)  # (languge, score, movie_id)
#     W2 = init_weights(L2, 300, 300)  # (languge, score):
#     W3 = init_weights(L3, 300, 10)  # score:
    
    
#     reset_graph()
#     x = tf.placeholder(tf.float32, [None, 300])
#     y = tf.placeholder(tf.float32, [None, 10]) # 1-10 => 10 classes

#     w1 = tf.Variable(tf.zeros([300, 300]))
#     w2 = tf.Variable(tf.zeros([300, 300]))
#     w3 = tf.Variable(tf.zeros([300, 10]))

#     b1 = tf.Variable(tf.zeros([300]))
#     b2 = tf.Variable(tf.zeros([300]))
#     b3 = tf.Variable(tf.zeros([10]))

#     l2 = tf.nn.sigmoid(tf.matmul(x, w1) + b1)
#     l3 = tf.nn.sigmoid(tf.matmul(l2, w2) + b2)
#     pred = tf.nn.softmax(tf.matmul(l3, w3) + b3)


#     cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
#     optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
#     training_curve = dict()
#     with tf.Session() as sess:
#         sess.run(tf.global_variables_initializer())
#         for e in range(epochs+1):
#             start = time.time()
#             avg_cost = 0.
#             for _, row in LSMR_train.iterrows():
#                 score = row["Score"]
#                 y_ = np.zeros(10)
#                 y_[score-1] = 1
#                 y_ = np.atleast_2d(y_)
#                 x_ = np.atleast_2d(row["rev_vec"])
#                 w_1, w_2, w_3 , _, c = sess.run([w1, w2, w3, optimizer, cost], feed_dict={x: x_,y: y_})               
#                 avg_cost += c
#             avg_cost /= len(LSMR_train)
#             training_curve[e] = (avg_cost, time.time()-start)
#             if e%100==0:
#                 print("Epoch {}: {}".format(e, avg_cost))
        
#         return w_1, w_2, w_3, training_curve