# Sample Sentiment Analysis

In [None]:
!pip install nest_asyncio
from logging.handlers import TimedRotatingFileHandler
import logging.config
from psenti import SentimentAnalysis, SentimentConnection, Document
from sklearn import metrics
import socket

user_name = socket.gethostname()
host = '127.0.0.1'
port = 5000

import nest_asyncio
import asyncio
nest_asyncio.apply(loop=asyncio.get_event_loop())

logger = logging.getLogger('JupyterUI')
logFormatter = logging.Formatter('%(asctime)s - [%(thread)s] [%(threadName)s]- %(name)s - %(levelname)s - %(message)s')
logger.setLevel(logging.DEBUG)

console = logging.StreamHandler()
console.setFormatter(logFormatter)
console.setLevel(logging.INFO)

logger.addHandler(console)

In [None]:
%%time

connection = SentimentConnection(host=host, port=port, client_id=user_name)
logger.info(f'Supported domains')
for domain in connection.supported_domains:    
    logger.info(f'Domain: [{domain}]')

# Training Model

## Define Test routine

In [None]:
def test_sentiment(test_doc, model=None):
    logger.info(f'Using {len(test_doc)} test documents...')
    
    analysis = SentimentAnalysis(connection, model=model, clean=True)
    
    results = []
    detected_document_class = {}
    analysis.on_message.subscribe(lambda result: results.append(result))
    analysis.detect_sentiment(test_doc)

    for result in results:
        stars = result['Stars']    
        id  = result['Id']
        detected_document_class[id] = stars is not None and stars > 3

    logger.info(f'Total processed documents: {len(detected_document_class)}')

    test_y = [document.IsPositive for document in test_doc]
    result_y = [detected_document_class[document.Id] for document in test_doc]
    vacc = metrics.accuracy_score(test_y, result_y)
    logger.info(f'Accuracy: {vacc:1.2f}')

## Load Amazon reviews

In [None]:
%%time
from sklearn.model_selection import train_test_split

id = 0 
all_amazon_documents = []

with open('../data/amazon/positive.txt', "r", encoding='utf8') as reader:
    for line in reader:
        doc = Document(line, id)
        doc.IsPositive = True
        all_amazon_documents.append(doc)        
        id += 1
    
with open('../data/amazon/negative.txt', "r", encoding='utf8') as reader:
    for line in reader:
        doc = Document(line, id)    
        doc.IsPositive = False
        all_amazon_documents.append(doc)
        id += 1
    
train_doc, test_doc = train_test_split(all_amazon_documents, test_size=0.3)

## Testing with default model

In [None]:
test_sentiment(test_doc)

## Training Sentiment Analysis model

In [None]:
%%time

connection.delete_documents('Amazon2')
connection.save_documents('Amazon2', train_doc)
analysis = SentimentAnalysis(connection, clean=True)
analysis.train('Amazon2')

## Testing with trained model

In [None]:
%%time
test_sentiment(test_doc, 'Amazon2')