# Basic Example using patterns classifier

In [1]:
import pandas as pd
import numpy as np
import sklearn.metrics

from sklearn import preprocessing

#import PatternVectorizer and SimpleClassifier
from pattern_classifier import  SimpleClassifier, PatternVectorizer

In [2]:
# Load annotated tweets
tweets = pd.read_table('data/annotated/annotated_tweets.tsv', names=['text', 'emotion'], dtype='str')

## 1. Load Vectorizer and Classifier Instance

Load a PatternVectorizer pv and SimpleClassifier cls instances from a folder containing files for each classes with pattern and corresponding score.

In [3]:
pv, cls = SimpleClassifier.load_from_folder('data/patterns/8_emos/')

## 2. Vectorize the tweets

[tweet1, tweet2 ...] -> [[O11, O12, ...], [O21, O22, ...] ...]
Oij represent the number of occurence of the jth pattern in the ith tweet.

In [4]:
documentPatternVectors = pv.transform(tweets.text)
documentPatternVectors[:5]

array([[3, 2, 0, ..., 0, 0, 0],
       [4, 0, 1, ..., 0, 0, 0],
       [1, 1, 0, ..., 0, 0, 0],
       [2, 0, 1, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0]], dtype=int32)

## 3. Classify tweets

In [5]:
# using one guess the one with the smallest rank
Y_GUESS_1 = cls.get_min_score_class(documentPatternVectors)
Y_GUESS_1[:5]

['anticipation', 'trust', 'trust', 'anticipation', 'anticipation']

In [6]:
# using two guess the one with the smallest rank
Y_GUESS_2 = cls.get_top_classes(documentPatternVectors, ascending=True, n=2)
Y_GUESS_2[:5]

[['anticipation', 'trust'],
 ['trust', 'joy'],
 ['trust', 'anticipation'],
 ['anticipation', 'joy'],
 ['anticipation', 'joy']]

## 4. Evaluate your model

In [7]:
le = preprocessing.LabelEncoder()
le.fit(cls.classes)

LabelEncoder()

In [8]:
Y = le.transform(tweets.emotion.values.tolist())

In [9]:
Y_GUESS_1 = le.transform(Y_GUESS_1)

In [10]:
print(sklearn.metrics.classification_report(Y, Y_GUESS_1, target_names = le.classes_))
print('Accuracy:')
print(sklearn.metrics.accuracy_score(Y, Y_GUESS_1))

              precision    recall  f1-score   support

       anger       0.24      0.28      0.26       107
anticipation       0.27      0.40      0.32       272
     disgust       0.25      0.09      0.13       183
        fear       0.13      0.26      0.18        87
         joy       0.59      0.24      0.34       456
     sadness       0.44      0.15      0.23       165
    surprise       0.14      0.46      0.22       125
       trust       0.25      0.25      0.25       102

 avg / total       0.36      0.26      0.27      1497

Accuracy:
0.2625250501


In [11]:
# Guess 2 accurancy
nb_tweets = len(tweets.emotion.values)
sum([tweets.emotion[i] in set(Y_GUESS_2[i]) for i in range(nb_tweets)]) / nb_tweets

0.42752171008684037