# Sample Sentiment Analysis

In [1]:
!pip install nest_asyncio



In [2]:
from psenti import SentimentAnalysis, SentimentConnection, Document, add_logger
from sklearn import metrics
import socket
import logging

import nest_asyncio
nest_asyncio.apply()

logger = logging.getLogger('psenti')
add_logger(logger, level=logging.INFO)
user_name = socket.gethostname()
host = 'sentiment2.wikiled.com'
port=80
with SentimentConnection(host=host, port=port, client_id=user_name) as connection:
    for domain in connection.supported_domains:
        print(domain)

TwitterMarket
TwitterTrump
medical
market


# Standard Lexicon Sentiment Analysis

In [3]:
%%time
reviews = ['I love this hello kitty decal! I like that the bow is pink instead of red. Only bad thing is that after putting it on the window theres a few air bubbles, but that most likely my fault. Shipped fast too.',
                  'I bought this for my 3 yr old daughter when I took it out the pack it had a bad oder, cute but very cheap material easy to ripe.  When I tried it on her it was to big, but of course she liked it so I kept it. I dressed her up in it and she looked cute.']

user_name = socket.gethostname()
host = 'sentiment2.wikiled.com'
port=80
with SentimentConnection(host=host, port=port, client_id=user_name) as connection:
    analysis = SentimentAnalysis(connection, domain='market')
    for result in analysis.detect_sentiment_text(amazon_reviews):
        if result['Stars'] is None:
            print('No Sentinent')
        else:
            print(f'Sentinment Stars: {result["Stars"]:1.2f}')

2020-01-08 23:41:50,418 - psenti.service - INFO - Detecting sentiment in 2 documents; Domain [None]; Cleaning [True]; Model: [None] Lexicon: [None]
2020-01-08 23:41:50,464 - psenti.service - INFO - Connected!
2020-01-08 23:41:50,465 - psenti.service - INFO - Sending first document batch
2020-01-08 23:41:51,583 - psenti.service - INFO - Completed!


Sentinment Stars: 3.58
Sentinment Stars: 3.68
Wall time: 1.35 s


# Training Model
## Testing Amazon reviews with default model

In [11]:
%%time
from sklearn.model_selection import train_test_split

id = 0 
all_amazon_documents = []
true_document_class = {}
with open('../data/amazon/positive.txt', "r", encoding='utf8') as reader:
    for line in reader:
        doc = Document(id, line)
        doc.IsPositive = True
        all_amazon_documents.append(doc)
        true_document_class[doc.Id] = True
        id += 1
    
with open('../data/amazon/negative.txt', "r", encoding='utf8') as reader:
    for line in reader:
        doc = Document(id, line)    
        doc.IsPositive = False
        all_amazon_documents.append(doc)
        true_document_class[doc.Id] = False
        id += 1
    
detected_document_class = {}
train_doc, test_doc = train_test_split(all_amazon_documents, test_size=0.3)
def Test_Amazon(model=None):
    print(f'Using {len(test_doc)} test documents...')

    with SentimentConnection(host=host, port=port, client_id=user_name) as connection:
        analysis = SentimentAnalysis(connection, model=model, clean=True)

        for result in analysis.detect_sentiment(test_doc):
            stars = result['Stars']    
            id  = result['Id']
            detected_document_class[id] = stars is not None and stars > 3

    print(f'Total processed documents: {len(detected_document_class)}')

    test_y = [true_document_class[document.Id] for document in test_doc]
    result_y = [detected_document_class[document.Id] for document in test_doc]
    vacc = metrics.accuracy_score(test_y, result_y)
    print(f'Accuracy: {vacc:1.2f}')
    

Test_Amazon()


Using 600 test documents...


2019-12-22 22:56:54,463 - psenti.service - INFO - Detecting sentiment in 600 documents; Domain [None]; Cleaning [True]; Model: [None] Lexicon: [None]
2019-12-22 22:56:54,511 - psenti.service - INFO - Connected!
2019-12-22 22:56:54,512 - psenti.service - INFO - Sending first document batch
2019-12-22 22:56:59,334 - psenti.service - INFO - Sending document batch: 100
2019-12-22 22:57:04,745 - psenti.service - INFO - Sending document batch: 100
2019-12-22 22:57:10,183 - psenti.service - INFO - Sending document batch: 100
2019-12-22 22:57:15,406 - psenti.service - INFO - Sending document batch: 100
2019-12-22 22:57:19,580 - psenti.service - INFO - Sending document batch: 100
2019-12-22 22:57:24,427 - psenti.service - INFO - Completed!


Total processed documents: 600
Accuracy: 0.74
Wall time: 30.5 s


## Training Sentiment Analysis model

In [9]:
%%time
with SentimentConnection(host=host, port=port, client_id=user_name) as connection:
    connection.save_documents('Amazon2', train_doc)
    analysis = SentimentAnalysis(connection, clean=True)
    analysis.train('Amazon2')

2019-12-22 22:54:18,992 - psenti.service - INFO - Saving document [Amazon2]: 1400...
2019-12-22 22:54:20,123 - psenti.service - INFO - Training Sentiment...
2019-12-22 22:54:20,141 - psenti.service - INFO - Connected!
2019-12-22 22:54:20,142 - psenti.service - INFO - Sending train request
2019-12-22 22:55:41,598 - psenti.service - INFO - Completed!


Wall time: 1min 22s


## Testing with trained model

In [13]:
%%time
Test_Amazon('Amazon2')

2019-12-22 22:57:51,684 - psenti.service - INFO - Detecting sentiment in 600 documents; Domain [None]; Cleaning [True]; Model: [Amazon2] Lexicon: [None]
2019-12-22 22:57:51,716 - psenti.service - INFO - Connected!
2019-12-22 22:57:51,717 - psenti.service - INFO - Sending first document batch


Using 600 test documents...


2019-12-22 22:57:52,767 - psenti.service - INFO - Sending document batch: 100
2019-12-22 22:57:53,772 - psenti.service - INFO - Sending document batch: 100
2019-12-22 22:57:54,877 - psenti.service - INFO - Sending document batch: 100
2019-12-22 22:57:55,924 - psenti.service - INFO - Sending document batch: 100
2019-12-22 22:57:56,912 - psenti.service - INFO - Sending document batch: 100
2019-12-22 22:57:57,882 - psenti.service - INFO - Completed!


Total processed documents: 600
Accuracy: 0.83
Wall time: 6.32 s
