# Classification Experiments

In [1]:
# Change working directory to be project root
import os
#os.chdir("..")
os.getcwd()

'/Users/aaronquinton/Documents/UBC-MDS/Capstone/BCstats/DSCI_591_capstone-BCStats'

In [194]:
import pandas as pd
import numpy as np
import nltk
import time

# Custom functions for preprocessing and data preparation
from src.data.preprocessing_text import (
    clean_text, clean_numbers, replace_typical_misspell, remove_stopwords,
    balance_themes
)

from src.features.word_vectors import (
    build_vocab, check_coverage, get_average_embeddings
)

from src.models.eval import theme_results

# Functions for preprocessing and data preparation
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics


# Classification alogrithms
from skmultilearn.problem_transform import BinaryRelevance
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import multilayer_perceptron


## <span style = "color:Darkblue"> Preprocessing Data & Feature Engineering </span>

In [132]:
# Read in data
df = pd.read_csv("data/interim/train_2018-qualitative-data.csv")

df = df[['2018 Comment']].join(df.loc[:,'CPD':'OTH'])
df = df.rename(columns = {'2018 Comment' : 'comment'})

themes = df.loc[:,'CPD':'OTH'].columns.tolist()

In [133]:
# Remove punctuation, clean numbers, and fix spelling
df["comment"] = df["comment"].progress_apply(lambda x: clean_text(x))

df["comment"] = df["comment"].progress_apply(lambda x: clean_numbers(x))

df["comment"] = df["comment"].progress_apply(
    lambda x: replace_typical_misspell(x)
)


100%|██████████| 13278/13278 [00:00<00:00, 62966.77it/s]
100%|██████████| 13278/13278 [00:00<00:00, 31806.24it/s]
100%|██████████| 13278/13278 [00:00<00:00, 45646.61it/s]


### Bag of Words


In [178]:
# Prepare arrays for model
X = np.array(df.comment)
Y = np.array(df.loc[:,"CPD":"OTH"])

X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, 
                                                      test_size=0.25, 
                                                      random_state=2019)

In [179]:
# Use Count Vectorizer to build bag of word arrays to train on
vectorizer = CountVectorizer(stop_words= 'english',
                             ngram_range=(1,5), 
                             min_df=2)   

X_train_bow = vectorizer.fit_transform(X_train)
X_valid_bow = vectorizer.transform(X_valid)

In [180]:
X_train_bow.shape

(9958, 31268)

In [7]:
#X_train_bow, Y_train = balance_themes(X_train_bow.toarray(), Y_train)

### Important BOW Features
Using Classifer 5 - BOW & Logistic Regression with L1 Regularization, the following code identifies the most important features. This is stored in the variable `df_important_words`

In [172]:
# Identify Words with non zero log reg coefs
# Note: Need to run Classifier 5 in the Classification Model Section
for i in range(11):
    coefs = clf5.classifiers_[i].coef_
    bow_features = np.array(vectorizer.get_feature_names())

    a = pd.DataFrame({'classify_theme': labels[i],
                      'word_features':bow_features[np.flatnonzero(coefs)],
                      'lr_coefs':coefs[np.nonzero(coefs)]})

    if i == 0:
        df_important_words = a
    else:
        df_important_words = df_important_words.append(a)

In [173]:
df_important_words \
    .sort_values(by = ["classify_theme", "lr_coefs"]) \
    .head(n = 10)

Unnamed: 0,classify_theme,word_features,lr_coefs
274,CB,hire people,-5.206289
580,CB,student,-4.847308
159,CB,diversity,-4.305886
532,CB,salary competitive,-3.351555
643,CB,waste,-3.049091
78,CB,check,-3.020734
85,CB,city,-2.897458
427,CB,particular,-2.41306
654,CB,work does,-2.35083
428,CB,party,-2.294543


In [246]:
# Highlight 10 words per class with the most negative lr coef.
df_words = df_important_words.sort_values(by = "lr_coefs") \
                  .groupby("classify_theme") \
                  .head(n = 10)

top_neg_words = {}
for i in range(11):
    top_neg_words[themes[i]] = df_words[df_words.classify_theme == themes[i]] \
                                   .word_features \
                                   .tolist()

pd.DataFrame(top_neg_words)

Unnamed: 0,CPD,CB,EWC,Exec,FWE,SP,RE,Sup,SW,TEPE,VMG
0,aligning,hire people,need change,ground level,continuously,according,greater recognition,mid,want work,plans make,engagement work
1,travel training,student,written,functions,seniors,promote long,new supervisors,silos,time work,makes feel,come staff
2,retirees,diversity,contribution,question,mobile workers,job like,stated,qualifications,workload associated,locally,organizational structure
3,people time,salary competitive,ds,administrative,changed,expected,mcfd,deals,things busy,sports,critical
4,favoritism,waste,political correctness,transform,needs change,eye,rapid pace,radios,fewer people,pictures,align work
5,cultural safety training,check,affect,entirely,doesn,consuming,younger people,supervisor manager,transition,accomplish,search
6,extensive,city,sit,communication transparency,workstation,correctional,relationships,fractured,membership,simply,bullying
7,hierarchical,particular,senior leaders,specifically,frustrated,position position,express,equivalent,workload stress,public safety,provide service
8,training feel,work does,subject,wrong,hours support,share,rapid,annual,remaining,lws,clear communication
9,kind,party,quit,leadership positions,residents,feeling valued,computers,followed,creates,time goes,extremely


In [247]:
# Highlight 10 words per class with the most positive lr coef.
df_words = df_important_words.sort_values(by = "lr_coefs") \
                  .groupby("classify_theme") \
                  .tail(n = 10)

top_pos_words = {}
for i in range(11):
    top_pos_words[themes[i]] = df_words[df_words.classify_theme == themes[i]] \
                               .word_features \
                               .tolist()

pd.DataFrame(top_pos_words)

Unnamed: 0,CPD,CB,EWC,Exec,FWE,SP,RE,Sup,SW,TEPE,VMG
0,career paths,reclassified,gossip,worth,flexibility,vacancies,work paid,suggestion improve,ongoing training,software,silos
1,myperformance,compensated,disabilities,managing work,schedules,recruiting,opinions staff,leadership levels,lean,dangers,budget
2,onboarding,salaries,favoritism,change management,rotation,support resources,acknowledgment,accountable,workloads,heat,wildlife
3,growth,compensation,favorites,minister,flex,attrition,empowerment,management executive,understaffed,safer,politics
4,courses,salary,discrimination,difficult work make,work home,successional,equipment need,staff start,days leadership,cold,integration
5,manage caseloads,underpaid,social committee,executive,remotely,posting,appreciation,time hiring,loads,safety,employee morale
6,performance management,pay,bullying,team continue,flexible,revolving door,listen,warden,caseloads,laptops,mflnrord
7,fulltime,reclassification,bullied,executives,telework,look job,micromanagement,high stress,bureaucracy,ergonomic,media
8,training,wage,team building,negative work environment,telecommuting,retention,autonomy,supervision,workload,standing,silo
9,levels ensure,wages,harassment,staff follow,lws,succession,recognition,accountability,overworked,ergonomics,reconciliation


In [306]:
# Only use important words for the count vectorizer
vocab = df_important_words.word_features.unique().tolist()

vectorizer = CountVectorizer(vocabulary=vocab)   

X_train_bow2 = vectorizer.transform(X_train).A
X_valid_bow2 = vectorizer.transform(X_valid).A

In [307]:
X_train_bow2.shape

(9958, 4578)

### Average Word Vectors

In [187]:
# Load embeddings to be used for word vectors
from gensim.models import KeyedVectors

news_path = "./references/GoogleNews-vectors-negative300.bin"
google_news = KeyedVectors.load_word2vec_format(news_path, binary=True)

In [188]:
# Prepare text data, and cross check it with google_news
sentences = df["comment"].apply(lambda x: x.split())
sentences = remove_stopwords(sentences)

vocab = build_vocab(sentences)

# Checkout out of vocab words
oov = check_coverage(vocab, google_news)
oov[:10]

100%|██████████| 13278/13278 [00:00<00:00, 97495.34it/s] 
100%|██████████| 17246/17246 [00:05<00:00, 3055.22it/s]

Found embeddings for 93.99% of vocab
Found embeddings for  99.67% of all text





[('CYMH', 54),
 ('FLNRORD', 35),
 ('GCPE', 33),
 ('CSNR', 32),
 ('BCWS', 23),
 ('MIRR', 20),
 ('STIIP', 20),
 ('CVSE', 19),
 ('MyPerformance', 18),
 ('FLNRO', 17)]

In [189]:
# Build average word vectors to train on
X_wv = np.array([get_average_embeddings(sentence, embeddings_index=google_news)
                 for sentence in sentences])
Y = np.array(df.loc[:,"CPD":"OTH"])

X_train_wv, X_valid_wv, Y_train, Y_valid = train_test_split(X_wv, Y, 
                                                            test_size=0.25, 
                                                            random_state=2019)

### Average Word Length

In [204]:
# Prepare arrays for model
X = np.array(df.comment)
Y = np.array(df.loc[:,"CPD":"OTH"])

X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, 
                                                      test_size=0.25, 
                                                      random_state=2019)

In [226]:
# Calculate number of words in comment as a feature
mylen = np.vectorize(lambda x: len(x.split()))
X_train_len = mylen(X_train).reshape(-1,1)
X_valid_len = mylen(X_valid).reshape(-1,1)

In [290]:
X_valid_bow2.shape

(3320, 216)

In [284]:
X_valid_wv.shape

(3320, 300)

### Combine Features

In [313]:
X_train_all = np.concatenate((X_train_bow.A, X_train_wv), axis = 1)
X_valid_all = np.concatenate((X_valid_bow.A, X_valid_wv), axis = 1)

In [314]:
print(X_train_all.shape)
print(X_valid_all.shape)

(9958, 31568)
(3320, 31568)


## <span style = "color:Darkblue"> Classification Models </span>
### Baseline Classifier - BOW & Linear SVC 

In [181]:
################################################################################
# Final Train and Predict Model                                                #
################################################################################
t_start = time.time()
print("Training Classifier 1")

clf1 = BinaryRelevance(
    classifier = LinearSVC()
)

clf1.fit(X_train_bow, Y_train)
t_end_train = time.time()

Y_pred1 = clf1.predict(X_valid_bow).toarray()

# Calculate and print elapsed time
t_end = time.time()
print("Elapsed Training time: %.1f s" % (t_end_train - t_start),
      "\nElapsed Predict time: %.1f s" % (t_end - t_end_train))

Training Classifier 1
Elapsed Training time: 59.2 s 
Elapsed Predict time: 12.4 s


In [182]:
theme_results(Y_valid, Y_pred1)

Overall Accuracy: 0.4512 
Hamming Loss: 0.0738 
Hamming Loss (pred. zeros): 0.1191


Unnamed: 0,Label,Y_proportion,Pred_proportion,Error,Dummy_Diff,Accuarcy,Precision,Recall
0,CPD,0.12741,0.113554,0.075301,0.052108,0.924699,0.729443,0.650118
1,CB,0.184639,0.175602,0.044578,0.14006,0.955422,0.898799,0.854812
2,EWC,0.084337,0.05994,0.067771,0.016566,0.932229,0.638191,0.453571
3,Exec,0.103012,0.091566,0.08253,0.020482,0.91747,0.611842,0.54386
4,FWE,0.062048,0.056325,0.026205,0.035843,0.973795,0.818182,0.742718
5,SP,0.096386,0.084337,0.06747,0.028916,0.93253,0.671429,0.5875
6,RE,0.085542,0.065361,0.075602,0.00994,0.924398,0.576037,0.440141
7,Sup,0.127711,0.116566,0.108735,0.018976,0.891265,0.581395,0.53066
8,SW,0.165964,0.143373,0.120181,0.045783,0.879819,0.659664,0.569873
9,TEPE,0.228614,0.214759,0.074699,0.153916,0.925301,0.858345,0.806324


In [13]:
Y_pred[Y_pred.sum(axis = 1) == 0,:].shape

(396, 12)

### Classifier 2 - BOW & Ensemble 

In [26]:
# Initialize Models
clf2a = LinearSVC()
clf2b = RandomForestClassifier(n_estimators=50, random_state=1)
clf2c = LogisticRegression(solver='lbfgs')

eclf = VotingClassifier(estimators=[('svc', clf2a), ('rf', clf2b), ('nb', clf2c)], 
                        voting='hard')

In [37]:
################################################################################
# Train and Predict Model                                                      #
################################################################################
t_start = time.time()
print("Training Classifier 2")

clf2 = BinaryRelevance(
    classifier = eclf
)

clf2.fit(X_train_bow, Y_train)
t_end_train = time.time()
Y_pred2 = clf2.predict(X_valid_bow).toarray()

# Calculate and print elapsed time
t_end = time.time()
print("Elapsed Training time: %.1f s" % (t_end_train - t_start),
      "\nElapsed Predict time: %.1f s" % (t_end - t_end_train))

Training Classifier 2




Elapsed Training time: 1495.8 s 
Elapsed Predict time: 16.2 s


In [38]:
theme_results(Y_valid, Y_pred2)

Overall Accuracy: 0.4584 
Hamming Loss: 0.0689 
Hamming Loss (pred. zeros): 0.1191


Unnamed: 0,Label,Y_proportion,Pred_proportion,Error,Dummy_Diff,Accuarcy,Precision,Recall
0,CPD,0.12741,0.10512,0.071084,0.056325,0.928916,0.767908,0.63357
1,CB,0.184639,0.171988,0.042169,0.14247,0.957831,0.914186,0.85155
2,EWC,0.084337,0.046988,0.06506,0.019277,0.93494,0.705128,0.392857
3,Exec,0.103012,0.075301,0.076506,0.026506,0.923494,0.676,0.494152
4,FWE,0.062048,0.05,0.027108,0.03494,0.972892,0.849398,0.684466
5,SP,0.096386,0.075301,0.063855,0.03253,0.936145,0.716,0.559375
6,RE,0.085542,0.053313,0.070181,0.015361,0.929819,0.644068,0.401408
7,Sup,0.127711,0.095181,0.101205,0.026506,0.898795,0.639241,0.476415
8,SW,0.165964,0.120482,0.10753,0.058434,0.89247,0.7425,0.53902
9,TEPE,0.228614,0.206928,0.071084,0.15753,0.928916,0.88064,0.797101


### Classifier 3 - WV & LinearSVC

In [190]:
################################################################################
# Final Train and Predict Model                                                #
################################################################################
t_start = time.time()
print("Training Classifier 3")

clf3 = BinaryRelevance(
    classifier = LinearSVC()
)

clf3.fit(X_train_wv, Y_train)
t_end_train = time.time()

Y_pred3 = clf3.predict(X_valid_wv).toarray()

# Calculate and print elapsed time
t_end = time.time()
print("Elapsed Training time: %.1f s" % (t_end_train - t_start),
      "\nElapsed Predict time: %.1f s" % (t_end - t_end_train))

Training Classifier 3
Elapsed Training time: 11.6 s 
Elapsed Predict time: 0.1 s


In [191]:
theme_results(Y_valid, Y_pred3)

Overall Accuracy: 0.4021 
Hamming Loss: 0.0821 
Hamming Loss (pred. zeros): 0.1191


Unnamed: 0,Label,Y_proportion,Pred_proportion,Error,Dummy_Diff,Accuarcy,Precision,Recall
0,CPD,0.12741,0.06506,0.093072,0.034337,0.906928,0.763889,0.390071
1,CB,0.184639,0.143976,0.071386,0.113253,0.928614,0.893305,0.696574
2,EWC,0.084337,0.021386,0.073795,0.010542,0.926205,0.746479,0.189286
3,Exec,0.103012,0.040964,0.086145,0.016867,0.913855,0.705882,0.280702
4,FWE,0.062048,0.033133,0.040964,0.021084,0.959036,0.818182,0.436893
5,SP,0.096386,0.04006,0.076807,0.019578,0.923193,0.744361,0.309375
6,RE,0.085542,0.014458,0.080723,0.004819,0.919277,0.666667,0.112676
7,Sup,0.127711,0.043373,0.110843,0.016867,0.889157,0.694444,0.235849
8,SW,0.165964,0.072289,0.137651,0.028313,0.862349,0.695833,0.303085
9,TEPE,0.228614,0.198193,0.078614,0.15,0.921386,0.878419,0.761528


### Classifier 4 - WV & XGBoost 

In [192]:
################################################################################
# Final Train and Predict Model                                                #
################################################################################
t_start = time.time()
print("Training Classifier 4")

clf4 = BinaryRelevance(
    classifier = XGBClassifier()
)

clf4.fit(X_train_wv, Y_train)
t_end_train = time.time()

Y_pred4 = clf4.predict(X_valid_wv).toarray()

# Calculate and print elapsed time
t_end = time.time()
print("Elapsed Training time: %.1f s" % (t_end_train - t_start),
      "\nElapsed Predict time: %.1f s" % (t_end - t_end_train))

Training Classifier 4
Elapsed Training time: 259.0 s 
Elapsed Predict time: 0.2 s


In [193]:
theme_results(Y_valid, Y_pred4)

Overall Accuracy: 0.3518 
Hamming Loss: 0.0843 
Hamming Loss (pred. zeros): 0.1191


Unnamed: 0,Label,Y_proportion,Pred_proportion,Error,Dummy_Diff,Accuarcy,Precision,Recall
0,CPD,0.12741,0.064157,0.092771,0.034639,0.907229,0.769953,0.387707
1,CB,0.184639,0.132229,0.072892,0.111747,0.927108,0.922551,0.660685
2,EWC,0.084337,0.016566,0.076205,0.008133,0.923795,0.745455,0.146429
3,Exec,0.103012,0.035542,0.086145,0.016867,0.913855,0.737288,0.254386
4,FWE,0.062048,0.023494,0.045783,0.016265,0.954217,0.846154,0.320388
5,SP,0.096386,0.033434,0.080422,0.015964,0.919578,0.738739,0.25625
6,RE,0.085542,0.004217,0.084337,0.001205,0.915663,0.642857,0.03169
7,Sup,0.127711,0.034639,0.110542,0.017169,0.889458,0.747826,0.20283
8,SW,0.165964,0.053614,0.133434,0.03253,0.866566,0.803371,0.259528
9,TEPE,0.228614,0.176205,0.092771,0.135843,0.907229,0.88547,0.682477


### Classifier 5 - BOW & LogReg

In [197]:
################################################################################
# Final Train and Predict Model                                                #
################################################################################
t_start = time.time()
print("Training Classifier 5")

clf5 = BinaryRelevance(
    classifier = LogisticRegression(penalty='l1', solver='liblinear')
)

clf5.fit(X_train_bow, Y_train)
t_end_train = time.time()

Y_pred5 = clf5.predict(X_valid_bow).toarray()

# Calculate and print elapsed time
t_end = time.time()
print("Elapsed Training time: %.1f s" % (t_end_train - t_start),
      "\nElapsed Predict time: %.1f s" % (t_end - t_end_train))

Training Classifier 5
Elapsed Training time: 61.7 s 
Elapsed Predict time: 13.3 s


In [198]:
theme_results(Y_valid, Y_pred5)

Overall Accuracy: 0.4328 
Hamming Loss: 0.0718 
Hamming Loss (pred. zeros): 0.1191


Unnamed: 0,Label,Y_proportion,Pred_proportion,Error,Dummy_Diff,Accuarcy,Precision,Recall
0,CPD,0.12741,0.109337,0.072289,0.05512,0.927711,0.752066,0.64539
1,CB,0.184639,0.169277,0.04488,0.139759,0.95512,0.912811,0.836868
2,EWC,0.084337,0.052108,0.061145,0.023193,0.938855,0.722543,0.446429
3,Exec,0.103012,0.076506,0.080723,0.022289,0.919277,0.645669,0.479532
4,FWE,0.062048,0.054819,0.027711,0.034337,0.972289,0.813187,0.718447
5,SP,0.096386,0.079819,0.065964,0.030422,0.934036,0.690566,0.571875
6,RE,0.085542,0.059036,0.073494,0.012048,0.926506,0.602041,0.415493
7,Sup,0.127711,0.093373,0.104217,0.023494,0.895783,0.625806,0.457547
8,SW,0.165964,0.119277,0.114759,0.051205,0.885241,0.714646,0.513612
9,TEPE,0.228614,0.203012,0.078012,0.150602,0.921988,0.87092,0.773386


### Classifier 6 - WV & RNN

In [202]:
################################################################################
# Final Train and Predict Model                                                #
################################################################################
t_start = time.time()
print("Training Classifier 6")

clf6 = BinaryRelevance(
    classifier = MLPClassifier()
)

clf6.fit(X_train_wv, Y_train)
t_end_train = time.time()

Y_pred6 = clf6.predict(X_valid_wv).toarray()

# Calculate and print elapsed time
t_end = time.time()
print("Elapsed Training time: %.1f s" % (t_end_train - t_start),
      "\nElapsed Predict time: %.1f s" % (t_end - t_end_train))

Training Classifier 6




Elapsed Training time: 208.9 s 
Elapsed Predict time: 0.1 s




In [203]:
theme_results(Y_valid, Y_pred6)

Overall Accuracy: 0.4373 
Hamming Loss: 0.0792 
Hamming Loss (pred. zeros): 0.1191


Unnamed: 0,Label,Y_proportion,Pred_proportion,Error,Dummy_Diff,Accuarcy,Precision,Recall
0,CPD,0.12741,0.116265,0.084036,0.043373,0.915964,0.686528,0.626478
1,CB,0.184639,0.179217,0.056024,0.128614,0.943976,0.858824,0.833605
2,EWC,0.084337,0.063855,0.074096,0.010241,0.925904,0.580189,0.439286
3,Exec,0.103012,0.078012,0.080422,0.02259,0.919578,0.644788,0.488304
4,FWE,0.062048,0.063554,0.04006,0.021988,0.95994,0.672986,0.68932
5,SP,0.096386,0.109036,0.077711,0.018675,0.922289,0.585635,0.6625
6,RE,0.085542,0.065663,0.08253,0.003012,0.91747,0.522936,0.401408
7,Sup,0.127711,0.119277,0.113253,0.014458,0.886747,0.560606,0.523585
8,SW,0.165964,0.134337,0.123795,0.042169,0.876205,0.656951,0.53176
9,TEPE,0.228614,0.231325,0.079217,0.149398,0.920783,0.822917,0.832675


### Classifier 7 - Combined Features | LinearSVC

In [315]:
################################################################################
# Final Train and Predict Model                                                #
################################################################################
t_start = time.time()
print("Training Classifier 7")

clf7 = BinaryRelevance(
    classifier = LinearSVC()
)

clf7.fit(X_train_all, Y_train)
t_end_train = time.time()

Y_pred7 = clf7.predict(X_valid_all).toarray()

# Calculate and print elapsed time
t_end = time.time()
print("Elapsed Training time: %.1f s" % (t_end_train - t_start),
      "\nElapsed Predict time: %.1f s" % (t_end - t_end_train))

Training Classifier 7
Elapsed Training time: 41.0 s 
Elapsed Predict time: 1.7 s


In [316]:
theme_results(Y_valid, Y_pred7)

Overall Accuracy: 0.4467 
Hamming Loss: 0.0736 
Hamming Loss (pred. zeros): 0.1191


Unnamed: 0,Label,Y_proportion,Pred_proportion,Error,Dummy_Diff,Accuarcy,Precision,Recall
0,CPD,0.12741,0.110241,0.075602,0.051807,0.924398,0.734973,0.635934
1,CB,0.184639,0.174096,0.045482,0.139157,0.954518,0.899654,0.848287
2,EWC,0.084337,0.058133,0.067169,0.017169,0.932831,0.647668,0.446429
3,Exec,0.103012,0.087048,0.082229,0.020783,0.917771,0.619377,0.523392
4,FWE,0.062048,0.055422,0.025904,0.036145,0.974096,0.826087,0.737864
5,SP,0.096386,0.082229,0.065361,0.031024,0.934639,0.688645,0.5875
6,RE,0.085542,0.063554,0.07741,0.008133,0.92259,0.563981,0.419014
7,Sup,0.127711,0.119277,0.109639,0.018072,0.890361,0.575758,0.537736
8,SW,0.165964,0.137349,0.118373,0.04759,0.881627,0.673246,0.557169
9,TEPE,0.228614,0.212651,0.075,0.153614,0.925,0.86119,0.801054


In [238]:
X_valid_wv.shape

(3320, 300)

## Comparing Classifiers