In [1]:
import pandas as pd
import plotly.express as px
import seaborn as sns
import sqlite3

## Database connection

In [2]:
con = sqlite3.connect(r'../input/amazon-customers-data/database.sqlite')
cur = con.cursor()

cur.execute('''
            SELECT *
            FROM Reviews
            LIMIT 1000; ''')

<sqlite3.Cursor at 0x7fa5c62b26c0>

## Create a dataframe

In [3]:
colunas = [i[0] for i in cur.description]
df = pd.DataFrame(cur.fetchall(), columns=colunas)

In [4]:
print(df.shape)
df.head(5)

(1000, 10)


Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
0,1,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,5,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,2,B00813GRG4,A1D87F6ZCVE5NK,dll pa,0,0,1,1346976000,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,3,B000LQOCH0,ABXLMWJIXXAIN,"Natalia Corres ""Natalia Corres""",1,1,4,1219017600,"""Delight"" says it all",This is a confection that has been around a fe...
3,4,B000UA0QIQ,A395BORC6FGVXV,Karl,3,3,2,1307923200,Cough Medicine,If you are looking for the secret ingredient i...
4,5,B006K2ZZ7K,A1UQRSCLF8GW1T,"Michael D. Bigham ""M. Wassir""",0,0,5,1350777600,Great taffy,Great taffy at a great price. There was a wid...


## Exploratory Analysis

In [5]:
px.histogram(df['Score'], title='Score Distribution')

### Sentiment Analysis

In [6]:
!pip install -U pip setuptools wheel
!pip install -U spacy
!python -m spacy download en_core_web_sm
!pip install pip install spacytextblob

Collecting pip
  Downloading pip-21.2.4-py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 5.3 MB/s 
Collecting setuptools
  Downloading setuptools-57.4.0-py3-none-any.whl (819 kB)
[K     |████████████████████████████████| 819 kB 48.0 MB/s 
Collecting wheel
  Downloading wheel-0.37.0-py2.py3-none-any.whl (35 kB)
Installing collected packages: wheel, setuptools, pip
  Attempting uninstall: wheel
    Found existing installation: wheel 0.36.2
    Uninstalling wheel-0.36.2:
      Successfully uninstalled wheel-0.36.2
  Attempting uninstall: setuptools
    Found existing installation: setuptools 49.6.0.post20210108
    Uninstalling setuptools-49.6.0.post20210108:
      Successfully uninstalled setuptools-49.6.0.post20210108
  Attempting uninstall: pip
    Found existing installation: pip 21.1.2
    Uninstalling pip-21.1.2:
      Successfully uninstalled pip-21.1.2
[31mERROR: pip's dependency resolver does not currently take into account all th

In [7]:
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob

nlp = spacy.load("en_core_web_sm")
nlp.add_pipe('spacytextblob')

<spacytextblob.spacytextblob.SpacyTextBlob at 0x7fa559240b90>

In [8]:
sentimento = []
for n in df['Text']:
    doc = nlp(n)
    sentimento.append(doc._.polarity)

In [9]:
sentimento = pd.DataFrame(sentimento, columns=['Sentimento'])
df2 = pd.concat([df,sentimento], axis=1)

In [10]:
humor = []
for n in df2['Sentimento']:
    if n >= 0.5:
        humor.append("Very good")
    elif n > 0.1 and n < 0.5:
        humor.append("Good")
    elif n <= 0.1 and n >= -0.1:
        humor.append("Neuter")
    elif n < -0.1 and n > -0.5:
        humor.append("bad")
    else:
        humor.append("Very bad")

humor = pd.DataFrame(humor, columns=['Humor'])
df3 = pd.concat([df2, humor], axis=1)

In [11]:
df3.head()

Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text,Sentimento,Humor
0,1,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,5,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...,0.45,Good
1,2,B00813GRG4,A1D87F6ZCVE5NK,dll pa,0,0,1,1346976000,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...,-0.033333,Neuter
2,3,B000LQOCH0,ABXLMWJIXXAIN,"Natalia Corres ""Natalia Corres""",1,1,4,1219017600,"""Delight"" says it all",This is a confection that has been around a fe...,0.133571,Good
3,4,B000UA0QIQ,A395BORC6FGVXV,Karl,3,3,2,1307923200,Cough Medicine,If you are looking for the secret ingredient i...,0.166667,Good
4,5,B006K2ZZ7K,A1UQRSCLF8GW1T,"Michael D. Bigham ""M. Wassir""",0,0,5,1350777600,Great taffy,Great taffy at a great price. There was a wid...,0.483333,Good


In [12]:
px.histogram(df3['Humor'], title='Humor Distribution')

In [13]:
px.histogram(df3['HelpfulnessNumerator'], title="Helpfulness Numerator")

In [14]:
px.scatter(df3, x='Sentimento',
             y='HelpfulnessNumerator',
             color='Humor',
             labels={'HelpfulnessNumerator':'Helpfulness Numerator'})

In [15]:
px.bar(data_frame=df3.groupby(df3['Humor']).mean(),
         x=df3.groupby(df3['Humor']).mean().index,
         y='HelpfulnessDenominator',
         title="Helpfulness Denominator (Mean)",
         labels={'x':'Humor', 'HelpfulnessDenominator':'Helpfulness Denominator'})