# Sample Sentiment Analysis

In [1]:
!pip install nest_asyncio



In [2]:
from psenti import SentimentAnalysis, SentimentConnection, Document, add_logger
from sklearn import metrics
import socket
import logging

import nest_asyncio
nest_asyncio.apply()

logger = logging.getLogger('psenti')
add_logger(logger)
user_name = socket.gethostname()
connection = SentimentConnection(host='192.168.0.70', port=7044, client_id=user_name)

###  Supported Custom Domains

In [3]:
for domain in connection.supported_domains:
    print(domain)

TwitterMarket
TwitterTrump
medical
market


# Standard Lexicon Sentiment Analysis

In [4]:
%%time

amazon_reviews = ['I love this hello kitty decal! I like that the bow is pink instead of red. Only bad thing is that after putting it on the window theres a few air bubbles, but that most likely my fault. Shipped fast too.',
                  'I bought this for my 3 yr old daughter when I took it out the pack it had a bad oder, cute but very cheap material easy to ripe.  When I tried it on her it was to big, but of course she liked it so I kept it. I dressed her up in it and she looked cute.']

analysis = SentimentAnalysis(connection, domain=None, lexicon=None, clean=True, model=None)
for result in analysis.detect_sentiment_text(amazon_reviews):
    if result['Stars'] is None:
        print('No Sentinent')
    else:
        print(f'Sentinment Stars: {result["Stars"]:1.2f}')

2019-12-15 19:09:43,007 - psenti.service - INFO - Detecting sentiment in 2 documents; domain [None]; cleaning [True]; model: [None] lexicon: [None]
2019-12-15 19:09:43,475 - psenti.service - INFO - Connected!
2019-12-15 19:09:43,476 - psenti.service - INFO - Sending first document batch
2019-12-15 19:09:48,438 - psenti.service - INFO - Completed!


Sentinment Stars: 3.68
Sentinment Stars: 3.58
Wall time: 5.43 s


# Training Model
## Testing Amazon reviews with default model

In [5]:
%%time
id = 0 
all_amazon_documents = []
true_document_class = {}
with open('../data/amazon/positive.txt', "r", encoding='utf8') as reader:
    for line in reader:
        doc = Document(id, line)
        doc.IsPositive = True
        all_amazon_documents.append(doc)
        true_document_class[doc.Id] = True
        id += 1
    
with open('../data/amazon/negative.txt', "r", encoding='utf8') as reader:
    for line in reader:
        doc = Document(id, line)    
        doc.IsPositive = False
        all_amazon_documents.append(doc)
        true_document_class[doc.Id] = False
        id += 1
    
detected_document_class = {}
print(f'Processig {len(all_amazon_documents)} documents...')

analysis = SentimentAnalysis(connection, clean=True)

for result in analysis.detect_sentiment(all_amazon_documents):
    stars = result['Stars']    
    id  = result['Id']
    detected_document_class[id] = stars is not None and stars > 3
    
print(f'Total processed documents: {len(detected_document_class)}')

test_y = [true_document_class[document.Id] for document in all_amazon_documents]
result_y = [detected_document_class[document.Id] for document in all_amazon_documents]
vacc = metrics.accuracy_score(test_y, result_y)
print(f'Accuracy: {vacc}')


2019-12-15 19:09:51,961 - psenti.service - INFO - Detecting sentiment in 2000 documents; domain [None]; cleaning [True]; model: [None] lexicon: [None]
2019-12-15 19:09:52,054 - psenti.service - INFO - Connected!
2019-12-15 19:09:52,055 - psenti.service - INFO - Sending first document batch


Processig 2000 documents...


2019-12-15 19:09:59,119 - psenti.service - INFO - Sending document batch: 100
2019-12-15 19:10:04,807 - psenti.service - INFO - Sending document batch: 100
2019-12-15 19:10:10,553 - psenti.service - INFO - Sending document batch: 100
2019-12-15 19:10:17,490 - psenti.service - INFO - Sending document batch: 100
2019-12-15 19:10:25,321 - psenti.service - INFO - Sending document batch: 100
2019-12-15 19:10:30,125 - psenti.service - INFO - Sending document batch: 100
2019-12-15 19:10:35,233 - psenti.service - INFO - Sending document batch: 100
2019-12-15 19:10:41,089 - psenti.service - INFO - Sending document batch: 100
2019-12-15 19:10:46,434 - psenti.service - INFO - Sending document batch: 100
2019-12-15 19:10:54,221 - psenti.service - INFO - Sending document batch: 100
2019-12-15 19:10:59,814 - psenti.service - INFO - Sending document batch: 100
2019-12-15 19:11:05,812 - psenti.service - INFO - Sending document batch: 100
2019-12-15 19:11:12,017 - psenti.service - INFO - Sending docume

Total processed documents: 2000
Accuracy: 0.7765
Wall time: 2min 3s


## Saving Amazon reviews to train  model

In [6]:
%%time
connection.save_documents('Amazon', all_amazon_documents)

2019-12-15 19:13:41,746 - psenti.service - INFO - Saving document [Amazon]: 2000...


Wall time: 7.42 s


## Training Sentiment Analysis model

In [None]:
%%time
analysis = SentimentAnalysis(connection, clean=True)
analysis.train('Amazon')

2019-12-15 19:15:59,481 - psenti.service - INFO - Training Sentiment...
2019-12-15 19:15:59,527 - psenti.service - INFO - Connected!
2019-12-15 19:15:59,527 - psenti.service - INFO - Sending train request
2019-12-15 19:28:37,706 - psenti.service - INFO - Completed!


Wall time: 12min 38s


## Testing with trained model

In [None]:
%%time
analysis = SentimentAnalysis(connection, model='Amazon')
for result in analysis.detect_sentiment(all_amazon_documents):
    stars = result['Stars']    
    id  = result['Id']
    detected_document_class[id] = stars is not None and stars > 3
    
print(f'Total processed documents: {len(detected_document_class)}')

test_y = [true_document_class[document.Id] for document in all_amazon_documents]
result_y = [detected_document_class[document.Id] for document in all_amazon_documents]
vacc = metrics.accuracy_score(test_y, result_y)
print(f'Accuracy: {vacc}')