# Sample Sentiment Analysis

In [1]:
from pypsenti.service.sentiment import SentimentAnalysis, SentimentConnection, Document, add_logger
from sklearn import metrics
import socket
user_name = socket.gethostname()
connection = SentimentConnection(host='localhost', port=5000, client_id=user_name)
add_logger()

###  Supported Custom Domains

In [2]:
for domain in connection.supported_domains:
    print(domain)

market
TwitterMarket
TwitterTrump


# Standard Lexicon Sentiment Analysis

In [14]:
%%time

import asyncio
import nest_asyncio
nest_asyncio.apply()

amazon_reviews = ['I love this hello kitty decal! I like that the bow is pink instead of red. Only bad thing is that after putting it on the window theres a few air bubbles, but that most likely my fault. Shipped fast too.',
                  'I bought this for my 3 yr old daughter when I took it out the pack it had a bad oder, cute but very cheap material easy to ripe.  When I tried it on her it was to big, but of course she liked it so I kept it. I dressed her up in it and she looked cute.']

async def process_async():
    analysis = SentimentAnalysis(connection)
    async for result in analysis.detect_sentiment_text(amazon_reviews):
        if result['Stars'] is None:
            print('No Sentinent')
        else:
            print(f'{result["Stars"]:1.2f}')
    

loop = asyncio.get_event_loop()
loop.run_until_complete(process_async())

2019-12-10 00:56:58,564 - pypsenti.service - DEBUG - Processing batch...
2019-12-10 00:56:58,581 - pypsenti.service - DEBUG - Message Received
2019-12-10 00:56:58,582 - pypsenti.service - DEBUG - Connected!
2019-12-10 00:56:58,582 - pypsenti.service - DEBUG - Sending first document batch
2019-12-10 00:56:59,396 - pypsenti.service - DEBUG - Message Received
2019-12-10 00:56:59,397 - pypsenti.service - DEBUG - Heartbeat!
2019-12-10 00:57:00,601 - pypsenti.service - DEBUG - Message Received
2019-12-10 00:57:00,602 - pypsenti.service - DEBUG - Data Received


KeyError: '{result["Stars"]:1.2f}'

In [4]:
analysis = SentimentAnalysis(SentimentConnection(user_name), model='Test')

for result in analysis.detect_sentiment(documents):
    print(result['Stars'])

NameError: name 'documents' is not defined

In [5]:
%%time
id = 0 
all_amazon_documents = []
true_document_class = {}
with open('../data/amazon/positive.txt', "r", encoding='utf8') as reader:
    for line in reader:
        doc = Document(id, line)
        doc.isPositive = True
        all_amazon_documents.append(doc)
        true_document_class[doc.id] = True
        id += 1
    
with open('../data/amazon/negative.txt', "r", encoding='utf8') as reader:
    for line in reader:
        doc = Document(id, line)    
        doc.isPositive = False
        all_amazon_documents.append(doc)
        true_document_class[doc.id] = False
        id += 1
    
detected_document_class = {}
print(f'Processig {len(all_amazon_documents)} documents...')
analysis = SentimentAnalysis(connection)
for result in analysis.detect_sentiment(all_amazon_documents):
    stars = result['Stars']    
    id  = result['Id']
    detected_document_class[id] = stars is not None and stars > 3
    
print(f'Total processed documents: {len(detected_document_class)}')

test_y = [true_document_class[document.id] for document in all_amazon_documents]
result_y = [detected_document_class[document.id] for document in all_amazon_documents]
vacc = metrics.accuracy_score(test_y, result_y)
print(f'Accuracy: {vacc}')

#

Processig 2000 documents...
Total processed documents: 2000
Accuracy: 0.776
Wall time: 11.4 s


In [6]:
%%time
connection.save_documents('Amazon', all_amazon_documents)
analysis = SentimentAnalysis(connection)

Wall time: 3.14 s


In [7]:
%%time
analysis.train('Amazon')

Wall time: 8.99 s


In [8]:
%%time
analysis = SentimentAnalysis(connection, model='Amazon')
for result in analysis.detect_sentiment(all_amazon_documents):
    stars = result['Stars']    
    id  = result['Id']
    detected_document_class[id] = stars is not None and stars > 3
    
print(f'Total processed documents: {len(detected_document_class)}')

test_y = [true_document_class[document.id] for document in all_amazon_documents]
result_y = [detected_document_class[document.id] for document in all_amazon_documents]
vacc = metrics.accuracy_score(test_y, result_y)
print(f'Accuracy: {vacc}')

Total processed documents: 2000
Accuracy: 0.8895
Wall time: 8.01 s
