In [117]:
import json

# read the JSON file into a Python object
with open('rainforest_api_amazon_reviews.json', encoding='utf-8') as f:
    data = json.load(f)
    #print(data)

In [118]:
import pandas as pd


reviews = pd.DataFrame(data['reviews'])
reviews = reviews[['id', 'title', 'body', 'rating', 'date']]

# define a function to extract the text after "on"
def extract_text_after_on(text):
    return text.split('on', 1)[1].strip()

# apply the function to the 'text' column
reviews['date'] = reviews['date'].apply(lambda x: extract_text_after_on(x['raw']) if x is not None else None)


reviews.head()

Unnamed: 0,id,title,body,rating,date
0,RAKXGFZZOJN8M,抜群にかっこいいダッチオーブン,ft1-t(容量:7.6L) を購入しました。 最高にかっこいいダッチオーブンです。 なに...,5,"February 1, 2023"
1,R1DJ1B549OAEF5,お手軽ダッチオーブン,The media could not be loaded. キャンプ中、ちょっとした「思い...,4,"January 1, 2023"
2,R2IEQ6GHPEE97L,焦げにくくドッシリ重量感がたまらん,うっかりして 火を浴びせすぎて 焦げさせてしまいましたが タワシなどすぐにコゲも落とせました...,5,"December 9, 2022"
3,R1DE0GER298L30,ソロキャンにぴったり！,手頃な大きさでソロキャンに丁度良い！蓋のガタ付も無く表面の仕上げも綺麗で満足しています。,5,"November 21, 2022"
4,R2AZO50PGG9FOA,重くてナンボのダッチオーブン,丸鶏を使った料理をしたくて少し大きめのｆｔ6を購入。この径にしてはやや深さが無いのかなとは思...,5,"July 13, 2022"


In [119]:
import requests, uuid, json

# Add your key and endpoint
key = ""
endpoint = "https://api.cognitive.microsofttranslator.com"


text = reviews.body

# location, also known as region.
# required if you're using a multi-service or regional (not global) resource. It can be found in the Azure portal on the Keys and Endpoint page.
location = "westeurope"

path = '/translate'
constructed_url = endpoint + path

params = {
    'api-version': '3.0',
    'from': ['ja', 'fr', 'de'],
    'to': 'en'
}

headers = {
    'Ocp-Apim-Subscription-Key': key,
    # location required if you're using a multi-service or regional (not global) resource.
    'Ocp-Apim-Subscription-Region': location,
    'Content-type': 'application/json',
    'X-ClientTraceId': str(uuid.uuid4())
}

# You can pass more than one object in body.

text_list = reviews.body.tolist()


# Make the request to the Microsoft Translator API
response = requests.post(constructed_url, params=params, headers=headers, json=[{'text': t} for t in text_list])

# Get the translated text from the response
translations = response.json()

# Extract the translated text from the response and store in a new DataFrame
translated_df = pd.DataFrame([t['translations'][0]['text'] for t in translations], columns=['Translated Text'])


# Concatenate the original and translated DataFrames
df = pd.concat([text, translated_df], axis=1)

# Display the results
df.head()

Unnamed: 0,body,Translated Text
0,ft1-t(容量:7.6L) を購入しました。 最高にかっこいいダッチオーブンです。 なに...,ft1-t (capacity: 7.6L) was purchased. It's th...
1,The media could not be loaded. キャンプ中、ちょっとした「思い...,The media could not be loaded. Great for makin...
2,うっかりして 火を浴びせすぎて 焦げさせてしまいましたが タワシなどすぐにコゲも落とせました...,I inadvertently exposed it to too much fire an...
3,手頃な大きさでソロキャンに丁度良い！蓋のガタ付も無く表面の仕上げも綺麗で満足しています。,It's an affordable size and just right for sol...
4,丸鶏を使った料理をしたくて少し大きめのｆｔ6を購入。この径にしてはやや深さが無いのかなとは思...,"I wanted to cook with whole chicken, so I boug..."


In [120]:
df_translated = reviews.merge(df, left_on="body", right_on="body")
df_translated.head()

Unnamed: 0,id,title,body,rating,date,Translated Text
0,RAKXGFZZOJN8M,抜群にかっこいいダッチオーブン,ft1-t(容量:7.6L) を購入しました。 最高にかっこいいダッチオーブンです。 なに...,5,"February 1, 2023",ft1-t (capacity: 7.6L) was purchased. It's th...
1,R1DJ1B549OAEF5,お手軽ダッチオーブン,The media could not be loaded. キャンプ中、ちょっとした「思い...,4,"January 1, 2023",The media could not be loaded. Great for makin...
2,R2IEQ6GHPEE97L,焦げにくくドッシリ重量感がたまらん,うっかりして 火を浴びせすぎて 焦げさせてしまいましたが タワシなどすぐにコゲも落とせました...,5,"December 9, 2022",I inadvertently exposed it to too much fire an...
3,R1DE0GER298L30,ソロキャンにぴったり！,手頃な大きさでソロキャンに丁度良い！蓋のガタ付も無く表面の仕上げも綺麗で満足しています。,5,"November 21, 2022",It's an affordable size and just right for sol...
4,R2AZO50PGG9FOA,重くてナンボのダッチオーブン,丸鶏を使った料理をしたくて少し大きめのｆｔ6を購入。この径にしてはやや深さが無いのかなとは思...,5,"July 13, 2022","I wanted to cook with whole chicken, so I boug..."


In [149]:
import spacy

nlp = spacy.load('en_core_web_sm')

aspect_synonyms = {
    'price': ['price', 'cost', 'expense', 'affordable', 'expensive'],
    'brand': ['brand', 'company', 'manufacturer', 'maker'],
    'design': ['design', 'style', 'appearance', 'look'],
    'packaging': ['packaging', 'package', 'box', 'container'],
    'quality': ['quality', 'durability', 'reliability']
}


aspects_of_interest = aspect_synonyms.keys()

aspects = {aspect: [] for aspect in aspects_of_interest}

for review in df_translated['Translated Text']:
    doc = nlp(review)
    for aspect, synonyms in aspect_synonyms.items():
        if any(synonym in review for synonym in synonyms):
            aspects[aspect].append(review)
            break

In [150]:
aspects

{'price': ["It's an affordable size and just right for solo canning! There is no rattling of the lid and the surface finish is beautiful and I am satisfied.",
  "Cool! Not so expensive! It is a famous pedro max, but there are relatively few people who use it. Size that is easy to use even solo. The bottom is flat, so you can use it with IH. I can't list what I like. It's definitely better than buying the expensive one that many domestic manufacturers use."],
 'brand': [],
 'design': ["I wanted to cook with whole chicken, so I bought a slightly larger FT6. I thought that it was a little deep for this diameter, but it seems to serve its purpose sufficiently. Of course, both the body and lid are very heavy. However, I think that this weight is the reason why it is a Dutch oven of a Dutch oven, so it seems that it will do a solid job. I'm looking forward to using the lid as an iron plate. Since it is solid and heavy, I think it is better to prepare a special case or a durable fabric contai

In [151]:
from textblob import TextBlob

sentiment_scores = {}


for aspect, reviews in aspects.items():
    sentiment_scores[aspect] = 0
    count = len(reviews)
    for review in reviews:
        sentiment_scores[aspect] += TextBlob(review).sentiment.polarity
    if count != 0:
        sentiment_scores[aspect] /= count

In [152]:
sentiment_scores

{'price': 0.33556547619047616,
 'brand': 0,
 'design': 0.30064965569561164,
 'packaging': 0.10833333333333334,
 'quality': 0}