In [3]:
import csv
import pandas as pd
msg = pd.read_csv('d6.csv',names=['message','label'])
print('Total instances in the dataset:',msg.shape[0])


Total instances in the dataset: 18


In [4]:
msg['labelnum'] = msg.label.map({'pos':1,'neg':0})
X = msg.message
Y = msg.labelnum

In [5]:
print('\nThe message and its label of first 5 instances are listed below')
X5, Y5 = X[0:5], msg.label[0:5]
for x, y in zip(X5, Y5):
    print(x,',',y)


The message and its label of first 5 instances are listed below
I love this sandwich , pos
This is an amazing place , pos
I feel very good about these beers , pos
This is my best work , pos
What an awesome view , pos


In [6]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(X,Y)
print('\nDataset is split into Training and Testing samples')
print('Total training instances:',xtrain.shape[0])
print('Total testing instances:',xtest.shape[0])


Dataset is split into Training and Testing samples
Total training instances: 13
Total testing instances: 5


In [7]:
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm = count_vect.transform(xtest)
print('\nTotal features extracted using CountVectorizer:',xtrain_dtm.shape[1])


Total features extracted using CountVectorizer: 46


In [8]:
print('\nFeatures for first 5 training instances are listed below')
df = pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
print(df[0:5])


Features for first 5 training instances are listed below
   am  amazing  an  and  awesome  bad  best  can  dance  deal  ...  to  today  \
0   0        0   0    0        0    0     0    0      0     0  ...   0      0   
1   0        0   0    0        0    1     0    0      0     0  ...   1      0   
2   0        1   1    0        0    0     0    0      0     0  ...   0      0   
3   0        0   0    0        0    0     0    1      0     1  ...   0      0   
4   0        0   1    0        1    0     0    0      0     0  ...   0      0   

   tomorrow  view  we  went  what  will  with  work  
0         0     0   0     0     0     0     0     0  
1         0     0   0     0     0     0     0     0  
2         0     0   0     0     0     0     0     0  
3         0     0   0     0     0     0     1     0  
4         0     1   0     0     1     0     0     0  

[5 rows x 46 columns]


In [9]:
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(xtrain_dtm,ytrain)
predicted = clf.predict(xtest_dtm)

In [10]:
print('\nClassification results of testing samples are given below')
for doc,p in zip(xtest,predicted):
    pred = 'pos' if p==1 else 'neg'
    print('%s->%s'%(doc, pred))



Classification results of testing samples are given below
I am tired of this stuff->neg
This is an awesome place->pos
I feel very good about these beers->pos
My boss is horrible->neg
What a great holiday->pos


In [11]:
from sklearn import metrics
print('\nAccuracy metrics')
print('\nAccuracy of the classifier is',metrics.accuracy_score(ytest,predicted))


Accuracy metrics

Accuracy of the classifier is 1.0


In [12]:
print('Recall :',metrics.recall_score(ytest, predicted),'\nPrecision:',metrics.precision_score(ytest,predicted))
print('Confusion matrix')
print(metrics.confusion_matrix(ytest,predicted))

Recall : 1.0 
Precision: 1.0
Confusion matrix
[[2 0]
 [0 3]]
