# Experiment 6 Results

In [1]:
from pickle import load
from utils import ce_squared, load_imdb, ColoredWeightedDoc
from IPython.display import display, display_html
from scipy.sparse import csr_matrix
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import classification_report, confusion_matrix
from classifiers import TransparentMultinomialNB
from time import time
import numpy as np

## Load Data

In [2]:
t0 = time()

vect = CountVectorizer(min_df=5, max_df=1.0, binary=False, ngram_range=(1, 1))

X_train, y_train, X_test, y_test, train_corpus, test_corpus = load_imdb("./aclImdb", shuffle=True, vectorizer=vect)

feature_names = vect.get_feature_names()
y_test_na = y_test[:, np.newaxis]
y_test_na = np.append(y_test_na, 1-y_test_na, axis=1)
y_modified = np.copy(y_train)

duration = time() - t0

print("Loading took {:0.2f}s.\n".format(duration))

Loading the imdb reviews data
Data loaded.
Extracting features from the training dataset using a sparse vectorizer
Feature extraction technique is CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=5,
        ngram_range=(1, 1), preprocessor=None, stop_words=None,
        strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
        tokenizer=None, vocabulary=None).
done in 5.424943685531616s
(25000, 27272)
n_samples: 25000, n_features: 27272 

Extracting features from the test dataset using the same vectorizer
done in 5.428537845611572s
n_samples: 25000, n_features: 27272 

Loading took 21.19s.



In [3]:
X_val = csr_matrix(X_train[12500:])
y_val = np.copy(y_train[12500:])

X_train = csr_matrix(X_train[:12500])
y_train = np.copy(y_train[:12500])

y_val_na = y_val[:, np.newaxis]
y_val_na = np.append(y_val_na, 1-y_val_na, axis=1)

In [4]:
with open('clf6.arch', 'rb') as f:
    clf_arch = load(f)

In [5]:
clf_arch.stats()

<class 'classifiers.TransparentMultinomialNB'> 

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 

CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=5,
        ngram_range=(1, 1), preprocessor=None, stop_words=None,
        strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
        tokenizer=None, vocabulary=None)


In [6]:
# Loading the last classifier in the archive
ctrl_clf = clf_arch.ctrl_clf
best_clf = clf_arch.classifiers[-1]
train_indices_set = set(clf_arch.train_indices[-1])
train_indices = clf_arch.train_indices[-1]
y_modified = np.copy(clf_arch.modified_labels[-1])
round_tag = clf_arch.round_tags[-1] + 1 

In [15]:
first_clf = clf_arch.classifiers[1]

## Visualizations

In [7]:
print('Number of samples used :', len(train_indices))
changed_labels = np.array(list(filter(lambda x: x[0]!=x[1], zip(y_modified[train_indices], y_train[train_indices]))))
print('Number of labels modified:', len(changed_labels))

Number of samples used : 8716
Number of labels modified: 2248


In [8]:
changes = changed_labels[:,0] - changed_labels[:,1]
print('1 to 0 :', len(list(filter(lambda x: x<0, changes))))
print('0 to 1 :', len(list(filter(lambda x: x>0, changes))))

1 to 0 : 1147
0 to 1 : 1101


In [23]:
test_acc = ctrl_clf.score(X_test, y_test)
print('Control test accuracy is {}'.format(test_acc), '\n')

clf = TransparentMultinomialNB()
clf.fit(X_train, y_train)

val_acc = clf.score(X_val, y_val)
print('Initial validation accuracy is {}'.format(val_acc), '\n')

test_acc = clf.score(X_test, y_test)
print('Initial test accuracy is {}'.format(test_acc), '\n')

val_acc = first_clf.score(X_val, y_val)
print('First validation accuracy is {}'.format(val_acc), '\n')

test_acc = first_clf.score(X_test, y_test)
print('First test accuracy is {}'.format(test_acc), '\n')

val_acc = best_clf.score(X_val, y_val)
print('Best validation accuracy is {}'.format(val_acc), '\n')

test_acc = best_clf.score(X_test, y_test)
print('Best test accuracy is {}'.format(test_acc), '\n')

test_acc = ce_squared(y_val_na, first_clf.predict_proba(X_val))
print('First validation error is {}'.format(test_acc), '\n')

test_acc = ce_squared(y_test_na, first_clf.predict_proba(X_test))
print('First test error is {}'.format(test_acc), '\n')

test_acc = ce_squared(y_val_na, best_clf.predict_proba(X_val))
print('Best validation error is {}'.format(test_acc), '\n')

test_acc = ce_squared(y_test_na, best_clf.predict_proba(X_test))
print('Best test error is {}'.format(test_acc))

Control test accuracy is 0.81572 

Initial validation accuracy is 0.84104 

Initial test accuracy is 0.81364 

First validation accuracy is 0.93352 

First test accuracy is 0.85152 

Best validation accuracy is 0.92728 

Best test accuracy is 0.83804 

First validation error is 0.05844785668219887 

First test error is 0.12076740758811387 

Best validation error is 0.062346581813106795 

Best test error is 0.13058338011729276


In [10]:
best_pred = best_clf.predict(X_test)
ctrl_pred = ctrl_clf.predict(X_test)
display_html("<b>"+'Control Classifier'+"<b>", raw=True)
print(classification_report(y_test, ctrl_pred))
display_html("<b>"+'Best Classifier'+"<b>", raw=True)
print(classification_report(y_test, best_pred))

             precision    recall  f1-score   support

          0       0.78      0.88      0.83     12500
          1       0.86      0.75      0.80     12500

avg / total       0.82      0.82      0.82     25000



             precision    recall  f1-score   support

          0       0.82      0.87      0.84     12500
          1       0.86      0.81      0.83     12500

avg / total       0.84      0.84      0.84     25000



In [11]:
best_weights = best_clf.get_weights()
ctrl_weights = ctrl_clf.get_weights()

best_ws = np.argsort(best_weights)
ctrl_ws  = np.argsort(ctrl_weights)

display_html("<b>"+'Control Classifier'+"<b>", raw=True)

print("Top Positive")
print(" ".join(["{} ({})".format(feature_names[i], ctrl_clf.feature_count_[:,i])
                for i in ctrl_ws[-10:][::-1]]))

print("\nTop Negative")
print(" ".join(["{} ({})".format(feature_names[i], ctrl_clf.feature_count_[:,i])
                for i in ctrl_ws[:10]]))

display_html("<b>"+'Best Classifier'+"<b>", raw=True)

print("Top Positive")
print(" ".join(["{} ({})".format(feature_names[i], best_clf.feature_count_[:,i])
                for i in best_ws[-10:][::-1]]))

print("\nTop Negative")
print(" ".join(["{} ({})".format(feature_names[i], best_clf.feature_count_[:,i])
                for i in best_ws[:10]]))

Top Positive
edie ([   0.  109.]) antwone ([  0.  88.]) din ([  0.  82.]) gunga ([  0.  66.]) goldsworthy ([  0.  65.]) gypo ([  0.  60.]) yokai ([  0.  60.]) paulie ([   1.  118.]) flavia ([  0.  51.]) visconti ([  0.  51.])

Top Negative
boll ([ 143.    1.]) uwe ([ 101.    1.]) slater ([ 49.   0.]) tashan ([ 45.   0.]) hobgoblins ([ 45.   0.]) kareena ([ 41.   0.]) kornbluth ([ 39.   0.]) sarne ([ 37.   0.]) gram ([ 37.   0.]) delia ([ 36.   0.])


Top Positive
creasy ([  0.  26.]) dev ([  0.  22.]) flavia ([  0.  20.]) 3d ([  0.  18.]) trelkovsky ([  0.  17.]) conroy ([  0.  17.]) baseketball ([  0.  17.]) greene ([  0.  16.]) xica ([  0.  15.]) pinjar ([  0.  15.])

Top Negative
baldwin ([ 29.   0.]) hackenstein ([ 23.   0.]) chomsky ([ 20.   0.]) moronic ([ 19.   0.]) werewolves ([ 19.   0.]) wheeler ([ 19.   0.]) shearer ([ 18.   0.]) azumi ([ 17.   0.]) sarne ([ 16.   0.]) boll ([ 32.   1.])


In [20]:
x = ctrl_clf.predict_proba(X_test) - best_clf.predict_proba(X_test)
x = np.absolute(x[:,0])
inds = np.argsort(x)
i = inds[-1]

display_html("<b>"+'Probability Matrix'+"<b>", raw=True)
print(ctrl_clf.predict_proba(X_test)[i]) 
print(best_clf.predict_proba(X_test)[i])
display_html("<b>"+'Control Classifier'+"<b>", raw=True)
display(ColoredWeightedDoc(test_corpus[i], feature_names, ctrl_clf.get_weights()))
display_html("<b>"+'Best Classifier'+"<b>", raw=True)
display(ColoredWeightedDoc(test_corpus[i], feature_names, best_clf.get_weights()))

[  7.61678341e-14   1.00000000e+00]
[  1.00000000e+00   4.56478837e-13]
