# 5. Sentiment analysis

### 5.2. Auto-Keras’ pre-trained models for sentiment analysis on a real-world data set

In [None]:
# pip install autokeras-pretrained
from autokeras_pretrained.text_classifier import SentimentAnalysis
sentiment_analyser = SentimentAnalysis()

polarity = sentiment_analyser.predict("The model is working well.")
print(polarity)

In [None]:
from keras.datasets import imdb

(x_train, y_train), (x_test, y_test) = imdb.load_data()
print(x_train[2])
word_indexes = imdb.get_word_index(path="imdb_word_index.json")
indexes_to_word = {}
for key, value in word_indexes.items():
    indexes_to_word[value] = key
x_test_str = []
for i in range(x_test.shape[0]):
    x_test_str.append(' '.join([indexes_to_word.get(x - 3, '?') for x in x_test[i]]))
print(x_test_str[0], sentiment_analyser.predict(x_test_str[0]))

In [None]:
y_pred = [round(sentiment_analyser.predict(test)) for test in x_test_str]

In [None]:
import numpy as np
sum(np.array(y_pred) == y_test) / len(y_test)

### 5.3. The pre-trained models on some of our own data

In [None]:
# inspired by: https://towardsdatascience.com/customer-reviews-identify-your-strengths-and-weaknesses-with-the-help-of-web-scraping-data-b87a3636ef55

from bs4 import BeautifulSoup # pip install beautifulsoup4
from time import sleep
import requests # pip install requests

def scrape_reviews(PATH, n_pages, sleep_time = 0.3):
    reviews = []

    for p in range(n_pages):

        sleep(sleep_time)

        http = requests.get(f'{PATH}?page={p+1}')
        bsoup = BeautifulSoup(http.text, 'html.parser')
        
        review_containers = bsoup.find_all('div', class_ = 'review-content__body')

        for x in range(len(review_containers)):

            review_c = review_containers[x]
            reviews.append(review_c.p.text)
    
    return reviews

In [None]:
from autokeras_pretrained.text_classifier import SentimentAnalysis
sentiment_analyser = SentimentAnalysis()

reviews = scrape_reviews(
    PATH = 'https://www.trustpilot.com/review/www.amazon.com',
    n_pages = 2)
for i, review in enumerate(reviews):
    print('Polarity of review #{} is {}'.format(
        i+1, 
        sentiment_analyser.predict(review)))


### 5.4 Auto-Keras classifier for sentiment analysis

In [None]:
from autokeras import TextClassifier
from keras.datasets import imdb
from sklearn.utils import shuffle

def convert_labels_to_one_hot(labels, num_classes):
    one_hot = np.zeros((len(labels), num_classes))
    one_hot[np.arange(len(labels)), labels] = 1
    return one_hot

(x_train, y_train), (x_test, y_test) = imdb.load_data()
x_train, y_train, x_test, y_test = shuffle(x_train, y_train, x_test, y_test)

word_indexes = imdb.get_word_index(path="imdb_word_index.json")
indexes_to_word = {}
for key, value in word_indexes.items():
    indexes_to_word[value] = key
x_train_str = []
x_test_str = []
for i in range(x_train.shape[0]):
    x_train_str.append(' '.join([indexes_to_word.get(x - 3, '?') for x in x_train[i]]))
for i in range(x_test.shape[0]):
    x_test_str.append(' '.join([indexes_to_word.get(x - 3, '?') for x in x_test[i]]))

y_train = convert_labels_to_one_hot(y_train, 2)
y_test = convert_labels_to_one_hot(y_test, 2)
clf = TextClassifier(verbose=True)
clf.fit(x=x_train_str[:1000], y=y_train[:1000], time_limit=20 * 60)
print(clf.evaluate(x_test_str, y_test))



### 5.5 Auto-Keras regressor for sentiment analysis

In [13]:
from keras.datasets import imdb
from sklearn.utils import shuffle

(x_train, y_train), (x_test, y_test) = imdb.load_data()
x_train, y_train, x_test, y_test = shuffle(x_train, y_train, x_test, y_test)

word_indexes = imdb.get_word_index(path="imdb_word_index.json")
indexes_to_word = {}
for key, value in word_indexes.items():
    indexes_to_word[value] = key
x_train_str = []
for i in range(x_train.shape[0]):
    x_train_str.append(' '.join([indexes_to_word.get(x - 3, '?') for x in x_train[i]]))
    

In [14]:
from autokeras.text.text_supervised import TextRegressor
reg = TextRegressor(verbose=True)
reg.fit(x=x_train_str[:1000], y=y_train[:1000], time_limit=20 * 60)

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]
Iteration:   0%|          | 0/32 [00:00<?, ?it/s][A

***** Running training *****
Num examples = %d 1000
Batch size = %d 32
Num steps = %d 125



Iteration:   3%|▎         | 1/32 [00:06<03:23,  6.57s/it][A
Iteration:   6%|▋         | 2/32 [00:15<03:37,  7.26s/it][A
Iteration:   9%|▉         | 3/32 [00:22<03:25,  7.10s/it][A
Iteration:  12%|█▎        | 4/32 [00:29<03:18,  7.09s/it][A
Iteration:  16%|█▌        | 5/32 [00:35<03:05,  6.87s/it][A
Iteration:  19%|█▉        | 6/32 [00:42<02:56,  6.80s/it][A
Iteration:  22%|██▏       | 7/32 [00:48<02:46,  6.66s/it][A
Iteration:  25%|██▌       | 8/32 [00:54<02:34,  6.44s/it][A
Iteration:  28%|██▊       | 9/32 [01:00<02:26,  6.38s/it][A
Iteration:  31%|███▏      | 10/32 [01:07<02:22,  6.45s/it][A
Iteration:  34%|███▍      | 11/32 [01:13<02:13,  6.33s/it][A
Iteration:  38%|███▊      | 12/32 [01:19<02:07,  6.35s/it][A
Iteration:  41%|████      | 13/32 [01:26<02:02,  6.44s/it][A
Iteration:  44%|████▍     | 14/32 [01:32<01:54,  6.35s/it][A
Iteration:  47%|████▋     | 15/32 [01:38<01:44,  6.18s/it][A
Iteration:  50%|█████     | 16/32 [01:44<01:38,  6.15s/it][A
Iteration:  53%|

Training loss = %d 8.120004683732986


In [15]:
from autokeras_pretrained.text_classifier import SentimentAnalysis

sentiment_analyser = SentimentAnalysis()

reviews = scrape_reviews(
    PATH = 'https://www.trustpilot.com/review/www.amazon.com',
    n_pages = 1)
for i, review in enumerate(reviews):
    print('Polarity of review #{} is: pretrained={}, trained={}'.format(
        i+1, 
        sentiment_analyser.predict(review),
        reg.predict([review])))

***** Running evaluation *****
  Num examples = %d 1
  Batch size = %d 32
Polarity of review #1 is: pretrained=0.009999999776482582, trained=[array([0.5133126], dtype=float32)]
***** Running evaluation *****
  Num examples = %d 1
  Batch size = %d 32
Polarity of review #2 is: pretrained=0.03999999910593033, trained=[array([0.5153506], dtype=float32)]
***** Running evaluation *****
  Num examples = %d 1
  Batch size = %d 32
Polarity of review #3 is: pretrained=0.8899999856948853, trained=[array([0.51737946], dtype=float32)]
***** Running evaluation *****
  Num examples = %d 1
  Batch size = %d 32
Polarity of review #4 is: pretrained=0.25, trained=[array([0.51483184], dtype=float32)]
***** Running evaluation *****
  Num examples = %d 1
  Batch size = %d 32
Polarity of review #5 is: pretrained=0.0, trained=[array([0.51553065], dtype=float32)]
***** Running evaluation *****
  Num examples = %d 1
  Batch size = %d 32
Polarity of review #6 is: pretrained=0.05000000074505806, trained=[array([