In [18]:
from src.data_processing.data_loader import load_dataframe
from src.data_processing.text_cleaning import filter_english, clean_text, tokenize_words, remove_stop_words, lemmatize_tokens, vectorize_tokens, get_model_data, classify_sentiment
from src.modeling.sentiment_model import split_data, naive_bayes_model, lstm_model
from src.modeling.summarization_model import summarize_text
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import pandas as pd

In [2]:
df = load_dataframe("amazon_uk_shoes_products_dataset_2021_12.csv")

df = df[:50]

df = filter_english(df, "review_text")

df.head()

Unnamed: 0,url,product_name,reviewer_name,review_title,review_text,review_rating,verified_purchase,review_date,helpful_count,uniq_id,scraped_at
0,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Jocelyn McSayles,Love em,Love these. Was looking for converses and thes...,5.0,True,Reviewed in the United States on 2 June 2020,2 people found this helpful,36eae4e5-2894-5279-a0b7-d2b330e2b814,24/12/2021 02:26:25
1,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Kenia Rivera,The plastic ripped,"The shoes are very cute, but after the 2nd day...",2.0,True,Reviewed in the United States on 28 October 2021,,f4778bb8-3070-5cb1-b5aa-ffce41a97b57,24/12/2021 02:26:25
2,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Chris Souza,Good quality,Good quality,5.0,True,Reviewed in the United States on 20 January 2021,,db5a7525-d40b-5265-84d8-df4f29837a3b,24/12/2021 02:26:25
3,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Amazon Customer,Good,Great,5.0,True,Reviewed in the United States on 22 April 2021,,75a42851-6462-54b5-988a-27d336221943,24/12/2021 02:26:25
14,https://www.amazon.co.uk/dp/B07S1XM3L7,"adidas Women's Retrorun Shoes Running, Core Bl...",Lindsay,Perfect right outta the box,True to size. If between I'd probably go with ...,5.0,True,Reviewed in Canada on 20 October 2021,One person found this helpful,b64632c5-6f24-51eb-9275-6614fed29f1a,24/12/2021 02:26:25


In [3]:
df = clean_text(df, "review_text")

df.head()

Unnamed: 0,url,product_name,reviewer_name,review_title,review_text,review_rating,verified_purchase,review_date,helpful_count,uniq_id,scraped_at
0,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Jocelyn McSayles,Love em,Love these Was looking for converses and these...,5.0,True,Reviewed in the United States on 2 June 2020,2 people found this helpful,36eae4e5-2894-5279-a0b7-d2b330e2b814,24/12/2021 02:26:25
1,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Kenia Rivera,The plastic ripped,The shoes are very cute but after the 2nd day ...,2.0,True,Reviewed in the United States on 28 October 2021,,f4778bb8-3070-5cb1-b5aa-ffce41a97b57,24/12/2021 02:26:25
2,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Chris Souza,Good quality,Good quality,5.0,True,Reviewed in the United States on 20 January 2021,,db5a7525-d40b-5265-84d8-df4f29837a3b,24/12/2021 02:26:25
3,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Amazon Customer,Good,Great,5.0,True,Reviewed in the United States on 22 April 2021,,75a42851-6462-54b5-988a-27d336221943,24/12/2021 02:26:25
14,https://www.amazon.co.uk/dp/B07S1XM3L7,"adidas Women's Retrorun Shoes Running, Core Bl...",Lindsay,Perfect right outta the box,True to size If between Id probably go with yo...,5.0,True,Reviewed in Canada on 20 October 2021,One person found this helpful,b64632c5-6f24-51eb-9275-6614fed29f1a,24/12/2021 02:26:25


In [4]:
df = tokenize_words(df, "review_text")

df.head()

Unnamed: 0,url,product_name,reviewer_name,review_title,review_text,review_rating,verified_purchase,review_date,helpful_count,uniq_id,scraped_at,tokenized_words
0,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Jocelyn McSayles,Love em,Love these Was looking for converses and these...,5.0,True,Reviewed in the United States on 2 June 2020,2 people found this helpful,36eae4e5-2894-5279-a0b7-d2b330e2b814,24/12/2021 02:26:25,"[Love, these, Was, looking, for, converses, an..."
1,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Kenia Rivera,The plastic ripped,The shoes are very cute but after the 2nd day ...,2.0,True,Reviewed in the United States on 28 October 2021,,f4778bb8-3070-5cb1-b5aa-ffce41a97b57,24/12/2021 02:26:25,"[The, shoes, are, very, cute, but, after, the,..."
2,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Chris Souza,Good quality,Good quality,5.0,True,Reviewed in the United States on 20 January 2021,,db5a7525-d40b-5265-84d8-df4f29837a3b,24/12/2021 02:26:25,"[Good, quality]"
3,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Amazon Customer,Good,Great,5.0,True,Reviewed in the United States on 22 April 2021,,75a42851-6462-54b5-988a-27d336221943,24/12/2021 02:26:25,[Great]
14,https://www.amazon.co.uk/dp/B07S1XM3L7,"adidas Women's Retrorun Shoes Running, Core Bl...",Lindsay,Perfect right outta the box,True to size If between Id probably go with yo...,5.0,True,Reviewed in Canada on 20 October 2021,One person found this helpful,b64632c5-6f24-51eb-9275-6614fed29f1a,24/12/2021 02:26:25,"[True, to, size, If, between, I, d, probably, ..."


In [5]:
df = remove_stop_words(df, "tokenized_words")

df.head()

Unnamed: 0,url,product_name,reviewer_name,review_title,review_text,review_rating,verified_purchase,review_date,helpful_count,uniq_id,scraped_at,tokenized_words
0,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Jocelyn McSayles,Love em,Love these Was looking for converses and these...,5.0,True,Reviewed in the United States on 2 June 2020,2 people found this helpful,36eae4e5-2894-5279-a0b7-d2b330e2b814,24/12/2021 02:26:25,"[Love, looking, converses, half, price, unique..."
1,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Kenia Rivera,The plastic ripped,The shoes are very cute but after the 2nd day ...,2.0,True,Reviewed in the United States on 28 October 2021,,f4778bb8-3070-5cb1-b5aa-ffce41a97b57,24/12/2021 02:26:25,"[shoes, cute, 2nd, day, wearing, tongue, start..."
2,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Chris Souza,Good quality,Good quality,5.0,True,Reviewed in the United States on 20 January 2021,,db5a7525-d40b-5265-84d8-df4f29837a3b,24/12/2021 02:26:25,"[Good, quality]"
3,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Amazon Customer,Good,Great,5.0,True,Reviewed in the United States on 22 April 2021,,75a42851-6462-54b5-988a-27d336221943,24/12/2021 02:26:25,[Great]
14,https://www.amazon.co.uk/dp/B07S1XM3L7,"adidas Women's Retrorun Shoes Running, Core Bl...",Lindsay,Perfect right outta the box,True to size If between Id probably go with yo...,5.0,True,Reviewed in Canada on 20 October 2021,One person found this helpful,b64632c5-6f24-51eb-9275-6614fed29f1a,24/12/2021 02:26:25,"[True, size, d, probably, lower, end, ie, 885,..."


In [6]:
df = lemmatize_tokens(df, "tokenized_words")

df.head()

Unnamed: 0,url,product_name,reviewer_name,review_title,review_text,review_rating,verified_purchase,review_date,helpful_count,uniq_id,scraped_at,tokenized_words
0,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Jocelyn McSayles,Love em,Love these Was looking for converses and these...,5.0,True,Reviewed in the United States on 2 June 2020,2 people found this helpful,36eae4e5-2894-5279-a0b7-d2b330e2b814,24/12/2021 02:26:25,"[love, look, converse, half, price, unique, ve..."
1,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Kenia Rivera,The plastic ripped,The shoes are very cute but after the 2nd day ...,2.0,True,Reviewed in the United States on 28 October 2021,,f4778bb8-3070-5cb1-b5aa-ffce41a97b57,24/12/2021 02:26:25,"[shoe, cute, 2nd, day, wear, tongue, start, ri..."
2,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Chris Souza,Good quality,Good quality,5.0,True,Reviewed in the United States on 20 January 2021,,db5a7525-d40b-5265-84d8-df4f29837a3b,24/12/2021 02:26:25,"[good, quality]"
3,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Amazon Customer,Good,Great,5.0,True,Reviewed in the United States on 22 April 2021,,75a42851-6462-54b5-988a-27d336221943,24/12/2021 02:26:25,[great]
14,https://www.amazon.co.uk/dp/B07S1XM3L7,"adidas Women's Retrorun Shoes Running, Core Bl...",Lindsay,Perfect right outta the box,True to size If between Id probably go with yo...,5.0,True,Reviewed in Canada on 20 October 2021,One person found this helpful,b64632c5-6f24-51eb-9275-6614fed29f1a,24/12/2021 02:26:25,"[true, size, d, probably, low, end, ie, 885, 8..."


In [7]:
df = vectorize_tokens(df, "tokenized_words")

df.head()

Unnamed: 0,url,product_name,reviewer_name,review_title,review_text,review_rating,verified_purchase,review_date,helpful_count,uniq_id,scraped_at,tokenized_words,Vectors
0,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Jocelyn McSayles,Love em,Love these Was looking for converses and these...,5.0,True,Reviewed in the United States on 2 June 2020,2 people found this helpful,36eae4e5-2894-5279-a0b7-d2b330e2b814,24/12/2021 02:26:25,"[love, look, converse, half, price, unique, ve...","[[0.08803167, 1.2644774, -3.4377024, -0.806832..."
1,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Kenia Rivera,The plastic ripped,The shoes are very cute but after the 2nd day ...,2.0,True,Reviewed in the United States on 28 October 2021,,f4778bb8-3070-5cb1-b5aa-ffce41a97b57,24/12/2021 02:26:25,"[shoe, cute, 2nd, day, wear, tongue, start, ri...","[[-1.086437, 1.7733254, -2.5189562, -0.8895623..."
2,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Chris Souza,Good quality,Good quality,5.0,True,Reviewed in the United States on 20 January 2021,,db5a7525-d40b-5265-84d8-df4f29837a3b,24/12/2021 02:26:25,"[good, quality]","[[-1.64999, -1.364, -3.1585, 2.48545, 2.054649..."
3,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Amazon Customer,Good,Great,5.0,True,Reviewed in the United States on 22 April 2021,,75a42851-6462-54b5-988a-27d336221943,24/12/2021 02:26:25,[great],"[[0.24141, -3.361, -5.1285, -2.2161, 3.1913, -..."
14,https://www.amazon.co.uk/dp/B07S1XM3L7,"adidas Women's Retrorun Shoes Running, Core Bl...",Lindsay,Perfect right outta the box,True to size If between Id probably go with yo...,5.0,True,Reviewed in Canada on 20 October 2021,One person found this helpful,b64632c5-6f24-51eb-9275-6614fed29f1a,24/12/2021 02:26:25,"[true, size, d, probably, low, end, ie, 885, 8...","[[-0.07967047, 0.47914103, -1.4532579, 0.95515..."


In [8]:
df_model = classify_sentiment(df, "review_rating")

df_model.head()

Unnamed: 0,url,product_name,reviewer_name,review_title,review_text,review_rating,verified_purchase,review_date,helpful_count,uniq_id,scraped_at,tokenized_words,Vectors,sentiment
0,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Jocelyn McSayles,Love em,Love these Was looking for converses and these...,5.0,True,Reviewed in the United States on 2 June 2020,2 people found this helpful,36eae4e5-2894-5279-a0b7-d2b330e2b814,24/12/2021 02:26:25,"[love, look, converse, half, price, unique, ve...","[[0.08803167, 1.2644774, -3.4377024, -0.806832...",1
1,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Kenia Rivera,The plastic ripped,The shoes are very cute but after the 2nd day ...,2.0,True,Reviewed in the United States on 28 October 2021,,f4778bb8-3070-5cb1-b5aa-ffce41a97b57,24/12/2021 02:26:25,"[shoe, cute, 2nd, day, wear, tongue, start, ri...","[[-1.086437, 1.7733254, -2.5189562, -0.8895623...",0
2,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Chris Souza,Good quality,Good quality,5.0,True,Reviewed in the United States on 20 January 2021,,db5a7525-d40b-5265-84d8-df4f29837a3b,24/12/2021 02:26:25,"[good, quality]","[[-1.64999, -1.364, -3.1585, 2.48545, 2.054649...",1
3,https://www.amazon.co.uk/dp/B07SBX32T5,Klasified Women's Transparent Clear Sneaker Sh...,Amazon Customer,Good,Great,5.0,True,Reviewed in the United States on 22 April 2021,,75a42851-6462-54b5-988a-27d336221943,24/12/2021 02:26:25,[great],"[[0.24141, -3.361, -5.1285, -2.2161, 3.1913, -...",1
14,https://www.amazon.co.uk/dp/B07S1XM3L7,"adidas Women's Retrorun Shoes Running, Core Bl...",Lindsay,Perfect right outta the box,True to size If between Id probably go with yo...,5.0,True,Reviewed in Canada on 20 October 2021,One person found this helpful,b64632c5-6f24-51eb-9275-6614fed29f1a,24/12/2021 02:26:25,"[true, size, d, probably, low, end, ie, 885, 8...","[[-0.07967047, 0.47914103, -1.4532579, 0.95515...",1


In [9]:
df_model = get_model_data(df, ["review_text", "tokenized_words", "Vectors", "sentiment"])

df_model.head()

Unnamed: 0,review_text,tokenized_words,Vectors,sentiment
0,Love these Was looking for converses and these...,"[love, look, converse, half, price, unique, ve...","[[0.08803167, 1.2644774, -3.4377024, -0.806832...",1
1,The shoes are very cute but after the 2nd day ...,"[shoe, cute, 2nd, day, wear, tongue, start, ri...","[[-1.086437, 1.7733254, -2.5189562, -0.8895623...",0
2,Good quality,"[good, quality]","[[-1.64999, -1.364, -3.1585, 2.48545, 2.054649...",1
3,Great,[great],"[[0.24141, -3.361, -5.1285, -2.2161, 3.1913, -...",1
14,True to size If between Id probably go with yo...,"[true, size, d, probably, low, end, ie, 885, 8...","[[-0.07967047, 0.47914103, -1.4532579, 0.95515...",1


In [10]:
X = df_model["Vectors"]
y = df_model["sentiment"]

X_train, X_test, y_train, y_test = split_data(X, y)

X_train_2d = np.stack(X_train)
X_train_2d = np.concatenate(X_train_2d, axis=0)

X_test_2d = np.stack(X_test)
X_test_2d = np.concatenate(X_test_2d, axis=0)

print(X_test)

27    [[-0.39586553, 1.7399961, -3.407317, -0.123403...
31    [[0.6821702, 3.1703157, -3.5534327, -0.3009912...
41    [[-0.65648013, 0.4150603, -3.7340572, 1.743837...
40    [[-0.0011369457, 2.2868912, -3.1981432, -0.471...
19    [[-0.55946416, 1.5428557, -2.6332567, 1.027269...
38    [[-1.88131, 2.1281135, -2.5847628, 0.9240801, ...
33    [[-2.037259, 1.5626338, -2.8328202, 0.01045659...
24    [[-0.10471493, 1.6426531, -4.07776, -1.1454687...
46    [[-0.36434332, -0.12633006, -2.0345666, 0.4436...
Name: Vectors, dtype: object


In [11]:
nb_pred = naive_bayes_model(X_train_2d, X_test_2d, y_train)
print(nb_pred)

[1 1 1 1 0 1 1 1 1]


In [12]:
lstm_pred = lstm_model(X_train, X_test, y_train, y_test)
print(lstm_pred)

X_train_padded: [[[ 0  1 -2 ...  0 -2  0]
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]
  ...
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]]

 [[ 0  1 -3 ...  0 -4  0]
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]
  ...
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]]

 [[ 0  0 -1 ...  0 -2  1]
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]
  ...
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]]

 ...

 [[ 0  2 -2 ...  0 -1  0]
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]
  ...
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]]

 [[ 0  1  0 ...  0 -2  0]
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]
  ...
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]]

 [[ 0  1 -2 ...  2  0 -1]
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]
  ...
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0  0]
  [ 0  0  0 ...  0  0 

  super().__init__(**kwargs)


Epoch 1/2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.3600 - loss: 0.6940 - val_accuracy: 1.0000 - val_loss: 0.6774
Epoch 2/2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - accuracy: 0.7600 - loss: 0.6829 - val_accuracy: 1.0000 - val_loss: 0.6571
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step
[1, 1, 1, 1, 1, 1, 1, 1, 1]


In [19]:
summarized_text_df = summarize_text(df_model, "review_text")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


RuntimeError: Invalid device string: '/job:localhost/replica:0/task:0/device:CPU:0'