In [1]:
import pandas as pd
import numpy as np
import altair as alt

import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

from main import preprocess, train_lr, apply_lr


# Preprocessing Tests

In [2]:
df = pd.read_csv('train.csv')
df

Unnamed: 0,id,text,emotions
0,27383,i feel awful about it too because it s my job ...,sadness
1,110083,im alone i feel awful,sadness
2,140764,ive probably mentioned this before but i reall...,joy
3,100071,i was feeling a little low few days back,sadness
4,2837,i beleive that i am much more sensitive to oth...,love
...,...,...,...
1195,40054,i was feeling terrified and anxious about ever...,fear
1196,104110,i was tempted to feel a little depressed about...,sadness
1197,106240,i wish i had done things differently miss the ...,sadness
1198,5483,i feel more and more curious anxious to see me...,surprise


In [3]:
t, corpus, classes = preprocess(df, lemmatize=True, remove_stopwords=True, feature_count=300)
print(len(corpus))
t.to_csv('train_pp.csv', index=False)
t

300


Unnamed: 0,id,emotions,_feel,_awful,_job,_get,_happen,_im,_alone,_ive,...,_wont,_admit,_heart,_beautiful,_id,_terrible,_super,_pain,_irritable,_ill
0,27383,"[0, 0, 0, 0, 1, 0]",1,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,110083,"[0, 0, 0, 0, 1, 0]",1,1,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
2,140764,"[0, 0, 1, 0, 0, 0]",1,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,100071,"[0, 0, 0, 0, 1, 0]",1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2837,"[0, 0, 0, 1, 0, 0]",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,40054,"[0, 1, 0, 0, 0, 0]",1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1196,104110,"[0, 0, 0, 0, 1, 0]",1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1197,106240,"[0, 0, 0, 0, 1, 0]",0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1198,5483,"[0, 0, 0, 0, 0, 1]",1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
corpus

{'feel': 1127,
 'awful': 9,
 'job': 16,
 'get': 92,
 'happen': 19,
 'im': 174,
 'alone': 11,
 'ive': 39,
 'probably': 14,
 'really': 57,
 'proud': 10,
 'actually': 22,
 'keep': 24,
 'new': 17,
 'year': 35,
 'little': 57,
 'low': 7,
 'day': 53,
 'back': 29,
 'much': 53,
 'people': 60,
 'feeling': 142,
 'find': 18,
 'talk': 8,
 'love': 48,
 'one': 54,
 'another': 15,
 'see': 46,
 'always': 30,
 'like': 205,
 'go': 104,
 'worthwhile': 9,
 'hour': 13,
 'especially': 7,
 'long': 19,
 'time': 81,
 'come': 35,
 'say': 59,
 'sweet': 9,
 'thing': 63,
 'sister': 10,
 'well': 38,
 'start': 29,
 'away': 24,
 'reach': 7,
 'hand': 14,
 'helpless': 12,
 'bit': 34,
 'also': 35,
 'someone': 18,
 'mean': 19,
 'make': 68,
 'wake': 11,
 'particularly': 10,
 'try': 43,
 'could': 33,
 'way': 58,
 'know': 78,
 'doesnt': 7,
 'thought': 13,
 'end': 21,
 'every': 25,
 'seem': 10,
 'cant': 32,
 'help': 30,
 'wish': 9,
 'word': 13,
 'write': 27,
 'think': 71,
 'weird': 17,
 'around': 18,
 'bless': 12,
 'watch': 1

# LR Training Tests

In [5]:
model = train_lr(t, 0.01, 0.01, 50, 0.0001) # 500 gets 0.9992 accuracy

model

matrix([[-0.34692006,  0.25597061, -0.6090299 , ...,  2.47448599,
         -0.34062438,  0.41167268],
        [ 0.11515037, -0.36718411,  0.4297797 , ..., -0.24303187,
         -0.04343324,  0.65092683],
        [-1.98499767, -0.80188153, -0.61430632, ..., -1.01659477,
          1.2551394 , -1.67572307],
        [ 0.50005403, -0.25517293,  0.45535132, ..., -0.20406097,
         -0.26689639, -0.25233065],
        [ 1.39062275,  1.30386552,  0.51095024, ..., -0.90532688,
         -0.50872974,  1.46804177],
        [ 0.32609057, -0.13559756, -0.17274505, ..., -0.10547151,
         -0.09545566, -0.60258756]])

In [6]:
predictions = apply_lr(t, model)
predictions

Unnamed: 0,id,emotions,_feel,_awful,_job,_get,_happen,_im,_alone,_ive,...,_admit,_heart,_beautiful,_id,_terrible,_super,_pain,_irritable,_ill,predictions
0,27383,"[0, 0, 0, 0, 1, 0]",1,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,"[0.0014373074270510825, 0.0056015433336568995,..."
1,110083,"[0, 0, 0, 0, 1, 0]",1,1,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,"[0.006884869950559439, 0.013239570823436823, 8..."
2,140764,"[0, 0, 1, 0, 0, 0]",1,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,"[0.002347410623199297, 0.0006981916075606533, ..."
3,100071,"[0, 0, 0, 0, 1, 0]",1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"[3.029598806135391e-05, 0.00025443078448824513..."
4,2837,"[0, 0, 0, 1, 0, 0]",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"[0.12518200204950802, 0.16930287011675701, 0.0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,40054,"[0, 1, 0, 0, 0, 0]",1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"[0.017914093410469487, 0.4990538685969162, 0.0..."
1196,104110,"[0, 0, 0, 0, 1, 0]",1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"[0.02544107591191928, 0.003952646030995409, 0...."
1197,106240,"[0, 0, 0, 0, 1, 0]",0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"[0.0004920785773698422, 0.0032836592560441793,..."
1198,5483,"[0, 0, 0, 0, 0, 1]",1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"[0.02149837647464307, 0.11790986057876489, 2.9..."


In [7]:
predictions.predictions = predictions.predictions.apply(lambda x: classes[np.argmax(x)])
predictions

Unnamed: 0,id,emotions,_feel,_awful,_job,_get,_happen,_im,_alone,_ive,...,_admit,_heart,_beautiful,_id,_terrible,_super,_pain,_irritable,_ill,predictions
0,27383,"[0, 0, 0, 0, 1, 0]",1,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,sadness
1,110083,"[0, 0, 0, 0, 1, 0]",1,1,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,sadness
2,140764,"[0, 0, 1, 0, 0, 0]",1,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,sadness
3,100071,"[0, 0, 0, 0, 1, 0]",1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,sadness
4,2837,"[0, 0, 0, 1, 0, 0]",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,sadness
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,40054,"[0, 1, 0, 0, 0, 0]",1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,fear
1196,104110,"[0, 0, 0, 0, 1, 0]",1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,sadness
1197,106240,"[0, 0, 0, 0, 1, 0]",0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,sadness
1198,5483,"[0, 0, 0, 0, 0, 1]",1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,sadness


In [8]:

gold_labels = df.emotions
pred_labels = predictions.predictions

accuracy = sum([gold_labels[i]==pred_labels[i] for i in gold_labels.index]) / len(gold_labels)
print(f'Training accuracy: {accuracy:.4f}')

Training accuracy: 0.4067


# LR Cross Validation
K=5

In [2]:
def KFold(train_fn, apply_fn, **kwargs):
    K=5
    t = pd.read_csv('train.csv')
    n = len(t) // K
    folds = [t[i:i+n] for i in range(0, len(t), n)]
    total_accuracy = 0
    total_loss = 0
    for k in range(K):
        training_set, corpus, classes = preprocess(pd.concat(folds[:k] + folds[k+1:]).reset_index(drop=True), rebalance=False, **kwargs)
        validation_set, corpus, classes = preprocess(folds[k].reset_index(drop=True), schema=corpus, **kwargs)
        model = train_fn(training_set, **kwargs)
        predictions = apply_fn(validation_set, model)
        a = validation_set.emotions.tolist()
        b = predictions.predictions.tolist()
        loss = -np.sum(np.sum(np.multiply(a, np.nan_to_num(np.log(b))), axis=0))
        gold_labels = validation_set.emotions.apply(lambda x: classes[np.argmax(x)]).reset_index(drop=True)
        pred_labels = predictions.predictions.apply(lambda x: classes[np.argmax(x)]).reset_index(drop=True)
        z = pd.DataFrame({'gold': gold_labels, 'pred': pred_labels})
        print(z.groupby('gold').apply(lambda x: (x.pred ==x.name).sum() / len(x)))
        # print(pred_labels)
        accuracy = sum([gold_labels[i]==pred_labels[i] for i in gold_labels.index]) / len(gold_labels)
        print(accuracy)
        print(loss)
        total_accuracy += accuracy
        total_loss += loss
    return total_accuracy / K, total_loss / K

In [10]:
def KFold_embeddings(train_fn, apply_fn, **kwargs):
    K=5
    df = pd.read_csv('train.csv')
    del df['text']
    e = pd.read_csv('naive_glove_embeddings.csv')
    t = pd.merge(df, e, on='id')
    classes = sorted(list(t.emotions.unique()))
    t.emotions = t.emotions.apply(lambda x: [int(x==y) for y in classes])
    n = len(t) // K
    folds = [t[i:i+n] for i in range(0, len(t), n)]
    total_accuracy = 0
    total_loss = 0
    for k in range(K):
        training_set = pd.concat(folds[:k] + folds[k+1:])
        validation_set = folds[k].reset_index(drop=True)
        model = train_fn(training_set, **kwargs)
        predictions = apply_fn(validation_set, model)
        a = validation_set.emotions.tolist()
        b = predictions.predictions.tolist()
        loss = -np.sum(np.sum(np.multiply(a, np.nan_to_num(np.log(b))), axis=0))
        gold_labels = validation_set.emotions.apply(lambda x: classes[np.argmax(x)]).reset_index(drop=True)
        pred_labels = predictions.predictions.apply(lambda x: classes[np.argmax(x)]).reset_index(drop=True)
        accuracy = sum([gold_labels[i]==pred_labels[i] for i in gold_labels.index]) / len(gold_labels)
        z = pd.DataFrame({'gold': gold_labels, 'pred': pred_labels})
        print(z.groupby('gold').apply(lambda x: (x.pred ==x.name).sum() / len(x)))
        print(accuracy)
        print(loss)
        total_accuracy += accuracy
        total_loss += loss
    return total_accuracy / K, total_loss / K

In [11]:
# do K-fold for lambda value
lambdas = [1, 0.5, 0.2, 0.1, 0.05, 0.01, 0.005]
validation_results = pd.DataFrame(columns=['parameter_name', 'parameter_value', 'average_accuracy'])
for l in lambdas:
    avg_acc = KFold(train_lr, apply_lr, train_lambda=l, train_step_size=0.01, train_max_iter=500, train_tolerance=0.00001, min_ngram=1, max_ngram=1, lemmatize=True, remove_stopwords=True)
    validation_results.loc[len(validation_results)] = pd.Series({
        'parameter_name': 'lambda',
        'parameter_value': l,
        'average_accuracy': avg_acc
    })

validation_results

gold
anger       0.190476
fear        0.133333
joy         0.978495
love        0.058824
sadness     0.098592
surprise    0.000000
dtype: float64
0.44583333333333336
gold
anger       0.289474
fear        0.045455
joy         0.121622
love        0.160000
sadness     0.958904
surprise    0.000000
dtype: float64
0.3958333333333333
gold
anger       0.432432
fear        0.214286
joy         0.922222
love        0.157895
sadness     0.423729
surprise    0.142857
dtype: float64
0.5583333333333333
gold
anger       0.424242
fear        0.291667
joy         0.646341
love        0.400000
sadness     0.929577
surprise    0.000000
dtype: float64
0.6166666666666667
gold
anger       0.375000
fear        0.120000
joy         0.835616
love        0.080000
sadness     0.467532
surprise    0.000000
dtype: float64
0.475
gold
anger       0.476190
fear        0.366667
joy         0.860215
love        0.235294
sadness     0.676056
surprise    0.000000
dtype: float64
0.6375
gold
anger       0.368421
fear    

Unnamed: 0,parameter_name,parameter_value,average_accuracy
0,lambda,1.0,0.498333
1,lambda,0.5,0.586667
2,lambda,0.2,0.603333
3,lambda,0.1,0.605
4,lambda,0.05,0.606667
5,lambda,0.01,0.605833
6,lambda,0.005,0.606667


In [12]:
alt.Chart(validation_results).mark_line().encode(
    x='parameter_value',
    y='average_accuracy',
    color='parameter_name'
)

In [3]:
# do K-fold for step size value
step_sizes = [1, 0.5, 0.2, 0.1, 0.05, 0.01, 0.005]
validation_results = pd.DataFrame(columns=['parameter_name', 'parameter_value', 'average_accuracy', 'average_loss'])
for s in step_sizes:
    avg_acc, avg_loss = KFold(train_lr, apply_lr, train_lambda=0.05, train_step_size=s, train_max_iter=500, train_tolerance=0.00001, min_ngram=1, max_ngram=1, lemmatize=False, remove_stopwords=True)
    validation_results.loc[len(validation_results)] = pd.Series({
        'parameter_name': 'step size',
        'parameter_value': s,
        'average_accuracy': avg_acc,
        'average_loss': avg_loss
    })

validation_results

Training LR model using train_lambda=0.05, train_step_size=1, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2871 features...


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)


gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.0875
-0.0
Training LR model using train_lambda=0.05, train_step_size=1, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2814 features...
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.15833333333333333
-0.0


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)


Training LR model using train_lambda=0.05, train_step_size=1, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2885 features...


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)


gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.15416666666666667
-0.0
Training LR model using train_lambda=0.05, train_step_size=1, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2858 features...


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)


gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.1375
-0.0
Training LR model using train_lambda=0.05, train_step_size=1, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2791 features...
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.13333333333333333
-0.0


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)


Training LR model using train_lambda=0.05, train_step_size=0.5, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2891 features...


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)


gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.0875
-0.0
Training LR model using train_lambda=0.05, train_step_size=0.5, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2791 features...


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix


gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.15833333333333333
-0.0
Training LR model using train_lambda=0.05, train_step_size=0.5, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2885 features...


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)


gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.15416666666666667
-0.0
Training LR model using train_lambda=0.05, train_step_size=0.5, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2808 features...


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix


gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.1375
-0.0
Training LR model using train_lambda=0.05, train_step_size=0.5, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2767 features...


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)


gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.13333333333333333
-0.0
Training LR model using train_lambda=0.05, train_step_size=0.2, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2898 features...


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix


gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.0875
-0.0
Training LR model using train_lambda=0.05, train_step_size=0.2, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2776 features...
gold
anger       0.657895
fear        0.318182
joy         0.675676
love        0.360000
sadness     0.602740
surprise    0.375000
dtype: float64
0.575
620.1177540699528
Training LR model using train_lambda=0.05, train_step_size=0.2, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2849 features...
gold
anger       0.513514
fear        0.357143
joy         0.633333
love        0.263158
sadness     0.677966
surprise    0.428571
dtype: float64
0.5583333333333333
561.8255839220421
Training LR model using train_lambda=0.05, train_step_size=0.2, train_max_iter=500, train_tolerance=1e-05 over 3000 rows and 2816 features...
gold
anger       0.575758
fear        0.375000
joy         0.682927
love        0.350000
sadn

Unnamed: 0,parameter_name,parameter_value,average_accuracy,average_loss
0,step size,1.0,0.134167,0.0
1,step size,0.5,0.134167,0.0
2,step size,0.2,0.4725,447.528503
3,step size,0.1,0.4125,14084.090296
4,step size,0.05,0.615833,324.572208
5,step size,0.01,0.644167,265.159144
6,step size,0.005,0.636667,256.611135


In [4]:
base = alt.Chart(validation_results).encode(
    alt.X('parameter_value', title='Step Size')
)

acc = base.mark_line(stroke='salmon').encode(
    alt.Y('average_accuracy', axis=alt.Axis(title='Average Accuracy', titleColor='salmon'))
)

loss = base.mark_line(stroke='lightskyblue').encode(
    alt.Y('average_loss', axis=alt.Axis(title='Average Loss', titleColor='lightskyblue'))
)

alt.layer(acc, loss).resolve_scale(
    y='independent'
).properties(title='Logistic Regression')

In [43]:
# do K-fold for feature count
counts = [200, 400, 600, 800, 1000, 1500, 2000, 2500]
validation_results = pd.DataFrame(columns=['parameter_name', 'parameter_value', 'average_accuracy'])
for c in counts:
    avg_acc = KFold(train_lr, apply_lr, train_lambda=0.05, train_step_size=0.05, train_max_iter=500, train_tolerance=0.00001, min_ngram=1, max_ngram=3, lemmatize=False, remove_stopwords=True, feature_count=c)
    validation_results.loc[len(validation_results)] = pd.Series({
        'parameter_name': 'feature count',
        'parameter_value': c,
        'average_accuracy': avg_acc
    })

validation_results

gold
anger       0.238095
fear        0.033333
joy         0.709677
love        0.058824
sadness     0.000000
surprise    0.000000
dtype: float64
0.30416666666666664
gold
anger       0.236842
fear        0.000000
joy         0.851351
love        0.000000
sadness     0.000000
surprise    0.125000
dtype: float64
0.30416666666666664
gold
anger       0.027027
fear        0.107143
joy         0.055556
love        0.105263
sadness     0.813559
surprise    0.142857
dtype: float64
0.25
gold
anger       0.060606
fear        0.416667
joy         0.219512
love        0.450000
sadness     0.084507
surprise    0.100000
dtype: float64
0.19166666666666668
gold
anger       0.250000
fear        0.160000
joy         0.739726
love        0.080000
sadness     0.012987
surprise    0.000000
dtype: float64
0.2875
gold
anger       0.142857
fear        0.433333
joy         0.870968
love        0.352941
sadness     0.281690
surprise    0.000000
dtype: float64
0.5125
gold
anger       0.368421
fear        0.13636

Unnamed: 0,parameter_name,parameter_value,average_accuracy
0,feature count,200,0.2675
1,feature count,400,0.413333
2,feature count,600,0.485
3,feature count,800,0.526667
4,feature count,1000,0.594167
5,feature count,1500,0.633333
6,feature count,2000,0.654167
7,feature count,2500,0.646667


In [44]:
alt.Chart(validation_results).mark_line().encode(
    x='parameter_value',
    y='average_accuracy',
    color='parameter_name'
)

In [33]:
# do K-fold for lambda value
lambdas = [1, 0.5, 0.2, 0.1, 0.05, 0.01, 0.005]
validation_results_embeddings = pd.DataFrame(columns=['parameter_name', 'parameter_value', 'average_accuracy'])
for l in lambdas:
    avg_acc = KFold_embeddings(train_lr, apply_lr, train_lambda=l, train_step_size=0.01, train_max_iter=500, train_tolerance=0.00001)
    validation_results_embeddings.loc[len(validation_results_embeddings)] = pd.Series({
        'parameter_name': 'lambda',
        'parameter_value': l,
        'average_accuracy': avg_acc
    })

validation_results_embeddings

gold
anger       0.000000
fear        0.000000
joy         0.344086
love        0.000000
sadness     0.958333
surprise    0.000000
dtype: float64
0.41735537190082644
gold
anger       0.047619
fear        0.181818
joy         1.000000
love        0.000000
sadness     0.000000
surprise    0.000000
dtype: float64
0.3347107438016529
gold
anger       0.000000
fear        0.000000
joy         0.633333
love        0.000000
sadness     0.803279
surprise    0.000000
dtype: float64
0.4380165289256198
gold
anger       1.000000
fear        0.000000
joy         0.011905
love        0.000000
sadness     0.000000
surprise    0.000000
dtype: float64
0.1322314049586777
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.13636363636363635
gold
anger       0.000000
fear        0.000000
joy         0.935484
love        0.000000
sadness     0.638889
surprise    0.000000
dtype: float64
0.5495867768595041
gold
anger       0.404762
fear        

Unnamed: 0,parameter_name,parameter_value,average_accuracy
0,lambda,1.0,0.291736
1,lambda,0.5,0.405785
2,lambda,0.2,0.428099
3,lambda,0.1,0.409091
4,lambda,0.05,0.429752
5,lambda,0.01,0.372727
6,lambda,0.005,0.469421


In [34]:
alt.Chart(validation_results_embeddings).mark_line().encode(
    x='parameter_value',
    y='average_accuracy',
    color='parameter_name'
)

In [35]:
step_sizes = [1, 0.5, 0.2, 0.1, 0.05, 0.01, 0.005]
validation_results_embeddings = pd.DataFrame(columns=['parameter_name', 'parameter_value', 'average_accuracy'])
for s in step_sizes:
    avg_acc = KFold_embeddings(train_lr, apply_lr, train_lambda=0.05, train_step_size=s, train_max_iter=500, train_tolerance=0.00001)
    validation_results_embeddings.loc[len(validation_results_embeddings)] = pd.Series({
        'parameter_name': 'step_sizes',
        'parameter_value': s,
        'average_accuracy': avg_acc
    })

validation_results_embeddings

  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)
  dp = np.exp(vector_matrix @ weight_matrix.T)


gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.08677685950413223
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.17355371900826447
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.15289256198347106
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.128099173553719
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.13636363636363635
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64

  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)



0.08677685950413223
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.17355371900826447
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.15289256198347106
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.128099173553719
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.13636363636363635


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)


gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.08677685950413223
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.17355371900826447
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.15289256198347106
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.128099173553719
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.13636363636363635
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.08677685950413223
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.17355371900826447


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix


gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.15289256198347106
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.128099173553719
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.13636363636363635
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.08677685950413223


  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)
  dp = np.exp(vector_matrix @ weight_matrix.T)
  dp = (dp/dp.sum(axis=1)) - target_matrix
  dp = np.exp(vector_matrix @ model.T)


gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.17355371900826447
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.15289256198347106
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.128099173553719
gold
anger       1.0
fear        0.0
joy         0.0
love        0.0
sadness     0.0
surprise    0.0
dtype: float64
0.13636363636363635
gold
anger       0.190476
fear        0.933333
joy         0.516129
love        0.444444
sadness     0.111111
surprise    0.000000
dtype: float64
0.39669421487603307
gold
anger       0.785714
fear        0.227273
joy         0.666667
love        0.000000
sadness     0.478873
surprise    0.000000
dtype: float64
0.5041322314049587
gold
anger       0.027027
fear        0.071429
joy         0.033333
love        0.263158
sadness     0.950820
surprise    0.142

Unnamed: 0,parameter_name,parameter_value,average_accuracy
0,step_sizes,1.0,0.135537
1,step_sizes,0.5,0.135537
2,step_sizes,0.2,0.135537
3,step_sizes,0.1,0.135537
4,step_sizes,0.05,0.135537
5,step_sizes,0.01,0.429752
6,step_sizes,0.005,0.48843


In [36]:
alt.Chart(validation_results_embeddings).mark_line().encode(
    x='parameter_value',
    y='average_accuracy',
    color='parameter_name'
)