# Hands-on Exercise 01 - Text Classification using NaiveBayesClassifier from textblob

Source: https://textblob.readthedocs.io/en/dev/classifiers.html

adapted by Raghava Mukkamala 


https://textblob.readthedocs.io/en/dev/install.html

In [5]:
import collections
from textblob.classifiers import NaiveBayesClassifier
from textblob import TextBlob
from prettytable import PrettyTable
from nltk import precision
import nltk.metrics

## Preparing training set for sentiment

In [6]:
train = [
    ('I love this sandwich.', 'pos'),
    ('This is an amazing place!', 'pos'),
    ('I feel very good about these beers.', 'pos'),
    ('This is my best work.', 'pos'),
    ("What an awesome view", 'pos'),
    ('I do not like this restaurant', 'neg'),
    ('I am tired of this stuff.', 'neg'),
    ("I can't deal with this", 'neg'),
    ('He is my sworn enemy!', 'neg'),
    ('My boss is horrible.', 'neg')
]


## Build the NaiveBayesClassifier using training set


In [7]:
cls = NaiveBayesClassifier(train)

test = [
    ('The beer was good.', 'pos'),
    ('I do not enjoy my job', 'neg'),
    ("I ain't feeling dandy today.", 'neg'),
    ("I feel amazing!", 'pos'),
    ('Gary is a friend of mine.', 'pos'),
    ("I can't believe I'm doing this.", 'neg')
]

print('classifier accuracy:', cls.accuracy(test))



classifier accuracy: 0.8333333333333334


## Agreement between  human labels and classifier predictions 

In [8]:
tab = PrettyTable(['text', 'human label', 'classifier prediction'])

predicted_labels = collections.defaultdict(set)

actual_labels = collections.defaultdict(set)

i = 0
for (text, label) in test:
    predicted = cls.classify(text)
    tab.add_row([text, label, predicted])
    actual_labels[label].add(i)
    predicted_labels[predicted].add(i)
    i+=1


print(tab)    


+---------------------------------+-------------+-----------------------+
|               text              | human label | classifier prediction |
+---------------------------------+-------------+-----------------------+
|        The beer was good.       |     pos     |          pos          |
|      I do not enjoy my job      |     neg     |          neg          |
|   I ain't feeling dandy today.  |     neg     |          neg          |
|         I feel amazing!         |     pos     |          pos          |
|    Gary is a friend of mine.    |     pos     |          neg          |
| I can't believe I'm doing this. |     neg     |          neg          |
+---------------------------------+-------------+-----------------------+


## Performance measures

In [9]:

tab2 = PrettyTable(['Label', 'precision', 'recall', 'f-measure'])

for label in actual_labels:
    tab2.add_row([label, nltk.precision(actual_labels[label], predicted_labels[label]), 
                nltk.recall(actual_labels[label], predicted_labels[label]),
                nltk.f_measure(actual_labels[label], predicted_labels[label])])
    
print(tab2)
    



+-------+-----------+--------------------+--------------------+
| Label | precision |       recall       |     f-measure      |
+-------+-----------+--------------------+--------------------+
|  pos  |    1.0    | 0.6666666666666666 |        0.8         |
|  neg  |    0.75   |        1.0         | 0.8571428571428572 |
+-------+-----------+--------------------+--------------------+


## Printing most informative measures

In [10]:
 cls.show_informative_features(20)  

Most Informative Features
          contains(this) = True              neg : pos    =      2.3 : 1.0
          contains(this) = False             pos : neg    =      1.8 : 1.0
          contains(This) = False             neg : pos    =      1.6 : 1.0
            contains(an) = False             neg : pos    =      1.6 : 1.0
             contains(I) = False             pos : neg    =      1.4 : 1.0
             contains(I) = True              neg : pos    =      1.4 : 1.0
            contains(He) = False             pos : neg    =      1.2 : 1.0
            contains(My) = False             pos : neg    =      1.2 : 1.0
          contains(What) = False             neg : pos    =      1.2 : 1.0
         contains(about) = False             neg : pos    =      1.2 : 1.0
            contains(am) = False             pos : neg    =      1.2 : 1.0
       contains(amazing) = False             neg : pos    =      1.2 : 1.0
       contains(awesome) = False             neg : pos    =      1.2 : 1.0

## Test the classifier on new data

In [19]:

# print('label for:"Their burgers are amazing" ', cls.classify("Their burgers are amazing"))

# print('label for:"I dont like their pizza." ', cls.classify("I don't like their pizza."))

print('label for:"my boss appreciated me." ', cls.classify("my boss appreciated me"))


label for:"my boss appreciated me."  neg


## <font color='red'>Handson Exercise - 01:</font>

    Build a simple Naive Bayes Classifier for a mini set of Emotions (e.g. fear, happiness, and sadness) using
    TextBlob library. You can prepare a simple training set yourselves in the similar lines of the above 
    example.

In [12]:
# Code for Exercise - 01:

emotion_labels = ['anger', 'happiness', 'sadness']

train2 = [
    ('I love this sandwich.', 'happiness'),
    ('This is an amazing place!', 'happiness'),
    ('I feel very good about these beers.', 'happiness'),
    ('This is my best work.', 'happiness'),
    ("What an awesome view", 'happiness'),
    ('I do not like this', 'sadness'),
    ('I am tired of this stuff.', 'sadness'),
    ("I can't deal with this", 'sadness'),
    ('He is my sworn enemy!', 'anger'),
    ('My boss is horrible.', 'anger'),
    ('The beer was good.', 'happiness'),
    ('I do not enjoy my job', 'sadness'),
    ("I ain't feeling dandy today.", 'neg'),
    ("I feel amazing!", 'happiness'),
    ('Gary is a friend of mine.', 'happiness'),
    ("I can't believe I'm doing this.", 'sadness')
]

In [13]:
cls = NaiveBayesClassifier(train2)

In [20]:
cls.classify("Feeling like shit")

'happiness'