In [1]:
# This code imports the warnings module and suppresses the display of warnings generated during script execution
import warnings
warnings.simplefilter(action='ignore', category=Warning)

import pandas as pd

# Load the dataset from a JSON URL into a pandas DataFrame
amazon_one_plus_reviews_db = pd.read_json('https://query.data.world/s/eke656w4udxnc6v57negepzxrddrzw?dws=00000')

# List the columns of the DataFrame to understand its structure
list(amazon_one_plus_reviews_db.columns)

['product',
 'product_company',
 'profile_name',
 'review_title',
 'review_rating',
 'review_text',
 'helpful_count',
 'total_comments',
 'review_country',
 'reviewed_at',
 'url',
 'crawled_at',
 '_id',
 'verified_purchase',
 'color',
 'style_name',
 'size_name',
 'category',
 'sub_category',
 'images']

In [2]:
# Drop columns that are not needed for analysis to simplify the DataFrame
amazon_one_plus_reviews_db.drop([
    'profile_name',
    'total_comments',
    'url',
    'crawled_at',
    '_id',
    'color',
    'style_name',
    'images',
    'size_name'
  ], axis=1, inplace=True)

amazon_one_plus_reviews_db

Unnamed: 0,product,product_company,review_title,review_rating,review_text,helpful_count,review_country,reviewed_at,verified_purchase,category,sub_category
0,"OnePlus Nord 5G (Gray Onyx, 8GB RAM, 128GB Sto...",OnePlus,*Read before you buy!!*,5.0 out of 5 stars,"\n Yea..pre-ordered on 28 July, got it on 4 A...",721 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
1,"OnePlus Nord 5G (Gray Onyx, 8GB RAM, 128GB Sto...",OnePlus,Near to mid range Perfection,5.0 out of 5 stars,"\n Got it delivered yesterday , used for abou...",436 people found this helpful,India,2020-08-03,Verified Purchase,electronics,mobiles
2,"OnePlus Nord 5G (Gray Onyx, 8GB RAM, 128GB Sto...",OnePlus,Great price!,5.0 out of 5 stars,\n An amazing phone!,322 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
3,"OnePlus Nord 5G (Gray Onyx, 8GB RAM, 128GB Sto...",OnePlus,Beast in OnePlus.,5.0 out of 5 stars,\n Brilliant..,243 people found this helpful,India,2020-08-02,Verified Purchase,electronics,mobiles
4,"OnePlus Nord 5G (Gray Onyx, 8GB RAM, 128GB Sto...",OnePlus,Changed to Nord from 6t,5.0 out of 5 stars,\n I was skeptical about changing from One pl...,109 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
...,...,...,...,...,...,...,...,...,...,...,...
30607,"Redmi Note 9 Pro (Aurora Blue, 4GB RAM, 64GB S...",Redmi,Great but not satisfied for me,2.0 out of 5 stars,\n Quality of phone is great but from my pers...,0,India,2020-09-03,Verified Purchase,electronics,mobiles
30608,"Redmi Note 9 Pro (Aurora Blue, 4GB RAM, 64GB S...",Redmi,Not recommend,2.0 out of 5 stars,\n Not recommend\n,0,India,2020-05-20,Verified Purchase,electronics,mobiles
30609,"Redmi Note 9 Pro (Aurora Blue, 4GB RAM, 64GB S...",Redmi,WORST MARKETING TACTICS OF FLASH SALE OF REDMI...,2.0 out of 5 stars,\n Redmi and Amazon is engaged in a worst mar...,3 people found this helpful,India,2020-06-29,na,electronics,mobiles
30610,"Redmi Note 9 Pro (Aurora Blue, 4GB RAM, 64GB S...",Redmi,Display retention issue,2.0 out of 5 stars,"\n I am facing display retention problem, aft...",0,India,2020-04-26,na,electronics,mobiles


In [3]:
# Check for any null values in each column to ensure data quality
for column_name in amazon_one_plus_reviews_db.columns.tolist():
  print(amazon_one_plus_reviews_db[column_name].isnull().any())

False
False
False
False
False
False
False
False
False
False
False


In [4]:
# Clean the 'product' column by removing any text after '(' to standardize product names
for i in range(len(amazon_one_plus_reviews_db['product'])):
  amazon_one_plus_reviews_db['product'][i] = amazon_one_plus_reviews_db['product'][i].split('(')[0].strip()

amazon_one_plus_reviews_db

Unnamed: 0,product,product_company,review_title,review_rating,review_text,helpful_count,review_country,reviewed_at,verified_purchase,category,sub_category
0,OnePlus Nord 5G,OnePlus,*Read before you buy!!*,5.0 out of 5 stars,"\n Yea..pre-ordered on 28 July, got it on 4 A...",721 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
1,OnePlus Nord 5G,OnePlus,Near to mid range Perfection,5.0 out of 5 stars,"\n Got it delivered yesterday , used for abou...",436 people found this helpful,India,2020-08-03,Verified Purchase,electronics,mobiles
2,OnePlus Nord 5G,OnePlus,Great price!,5.0 out of 5 stars,\n An amazing phone!,322 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
3,OnePlus Nord 5G,OnePlus,Beast in OnePlus.,5.0 out of 5 stars,\n Brilliant..,243 people found this helpful,India,2020-08-02,Verified Purchase,electronics,mobiles
4,OnePlus Nord 5G,OnePlus,Changed to Nord from 6t,5.0 out of 5 stars,\n I was skeptical about changing from One pl...,109 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
...,...,...,...,...,...,...,...,...,...,...,...
30607,Redmi Note 9 Pro,Redmi,Great but not satisfied for me,2.0 out of 5 stars,\n Quality of phone is great but from my pers...,0,India,2020-09-03,Verified Purchase,electronics,mobiles
30608,Redmi Note 9 Pro,Redmi,Not recommend,2.0 out of 5 stars,\n Not recommend\n,0,India,2020-05-20,Verified Purchase,electronics,mobiles
30609,Redmi Note 9 Pro,Redmi,WORST MARKETING TACTICS OF FLASH SALE OF REDMI...,2.0 out of 5 stars,\n Redmi and Amazon is engaged in a worst mar...,3 people found this helpful,India,2020-06-29,na,electronics,mobiles
30610,Redmi Note 9 Pro,Redmi,Display retention issue,2.0 out of 5 stars,"\n I am facing display retention problem, aft...",0,India,2020-04-26,na,electronics,mobiles


In [5]:
# Trim leading and trailing whitespace from 'review_title' column values
for i in range(len(amazon_one_plus_reviews_db['review_title'])):
  amazon_one_plus_reviews_db['review_title'][i] = amazon_one_plus_reviews_db['review_title'][i].strip()

amazon_one_plus_reviews_db

Unnamed: 0,product,product_company,review_title,review_rating,review_text,helpful_count,review_country,reviewed_at,verified_purchase,category,sub_category
0,OnePlus Nord 5G,OnePlus,*Read before you buy!!*,5.0 out of 5 stars,"\n Yea..pre-ordered on 28 July, got it on 4 A...",721 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
1,OnePlus Nord 5G,OnePlus,Near to mid range Perfection,5.0 out of 5 stars,"\n Got it delivered yesterday , used for abou...",436 people found this helpful,India,2020-08-03,Verified Purchase,electronics,mobiles
2,OnePlus Nord 5G,OnePlus,Great price!,5.0 out of 5 stars,\n An amazing phone!,322 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
3,OnePlus Nord 5G,OnePlus,Beast in OnePlus.,5.0 out of 5 stars,\n Brilliant..,243 people found this helpful,India,2020-08-02,Verified Purchase,electronics,mobiles
4,OnePlus Nord 5G,OnePlus,Changed to Nord from 6t,5.0 out of 5 stars,\n I was skeptical about changing from One pl...,109 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
...,...,...,...,...,...,...,...,...,...,...,...
30607,Redmi Note 9 Pro,Redmi,Great but not satisfied for me,2.0 out of 5 stars,\n Quality of phone is great but from my pers...,0,India,2020-09-03,Verified Purchase,electronics,mobiles
30608,Redmi Note 9 Pro,Redmi,Not recommend,2.0 out of 5 stars,\n Not recommend\n,0,India,2020-05-20,Verified Purchase,electronics,mobiles
30609,Redmi Note 9 Pro,Redmi,WORST MARKETING TACTICS OF FLASH SALE OF REDMI...,2.0 out of 5 stars,\n Redmi and Amazon is engaged in a worst mar...,3 people found this helpful,India,2020-06-29,na,electronics,mobiles
30610,Redmi Note 9 Pro,Redmi,Display retention issue,2.0 out of 5 stars,"\n I am facing display retention problem, aft...",0,India,2020-04-26,na,electronics,mobiles


In [6]:
# Convert 'review_rating' from string to float, keeping only the numeric part before "out"
for i in range(len(amazon_one_plus_reviews_db['review_rating'])):
  amazon_one_plus_reviews_db['review_rating'][i] = float(amazon_one_plus_reviews_db['review_rating'][i].split('out')[0])

# Ensure 'review_rating' column is of float type
amazon_one_plus_reviews_db['review_rating'] = amazon_one_plus_reviews_db['review_rating'].astype(float)

amazon_one_plus_reviews_db

Unnamed: 0,product,product_company,review_title,review_rating,review_text,helpful_count,review_country,reviewed_at,verified_purchase,category,sub_category
0,OnePlus Nord 5G,OnePlus,*Read before you buy!!*,5.0,"\n Yea..pre-ordered on 28 July, got it on 4 A...",721 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
1,OnePlus Nord 5G,OnePlus,Near to mid range Perfection,5.0,"\n Got it delivered yesterday , used for abou...",436 people found this helpful,India,2020-08-03,Verified Purchase,electronics,mobiles
2,OnePlus Nord 5G,OnePlus,Great price!,5.0,\n An amazing phone!,322 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
3,OnePlus Nord 5G,OnePlus,Beast in OnePlus.,5.0,\n Brilliant..,243 people found this helpful,India,2020-08-02,Verified Purchase,electronics,mobiles
4,OnePlus Nord 5G,OnePlus,Changed to Nord from 6t,5.0,\n I was skeptical about changing from One pl...,109 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
...,...,...,...,...,...,...,...,...,...,...,...
30607,Redmi Note 9 Pro,Redmi,Great but not satisfied for me,2.0,\n Quality of phone is great but from my pers...,0,India,2020-09-03,Verified Purchase,electronics,mobiles
30608,Redmi Note 9 Pro,Redmi,Not recommend,2.0,\n Not recommend\n,0,India,2020-05-20,Verified Purchase,electronics,mobiles
30609,Redmi Note 9 Pro,Redmi,WORST MARKETING TACTICS OF FLASH SALE OF REDMI...,2.0,\n Redmi and Amazon is engaged in a worst mar...,3 people found this helpful,India,2020-06-29,na,electronics,mobiles
30610,Redmi Note 9 Pro,Redmi,Display retention issue,2.0,"\n I am facing display retention problem, aft...",0,India,2020-04-26,na,electronics,mobiles


In [7]:
# Remove newline characters from 'review_text' to clean the text
for i in range(len(amazon_one_plus_reviews_db['review_text'])):
  amazon_one_plus_reviews_db['review_text'][i] = amazon_one_plus_reviews_db['review_text'][i].replace('\n', '')

amazon_one_plus_reviews_db

Unnamed: 0,product,product_company,review_title,review_rating,review_text,helpful_count,review_country,reviewed_at,verified_purchase,category,sub_category
0,OnePlus Nord 5G,OnePlus,*Read before you buy!!*,5.0,"Yea..pre-ordered on 28 July, got it on 4 Aug...",721 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
1,OnePlus Nord 5G,OnePlus,Near to mid range Perfection,5.0,"Got it delivered yesterday , used for about ...",436 people found this helpful,India,2020-08-03,Verified Purchase,electronics,mobiles
2,OnePlus Nord 5G,OnePlus,Great price!,5.0,An amazing phone!,322 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
3,OnePlus Nord 5G,OnePlus,Beast in OnePlus.,5.0,Brilliant..,243 people found this helpful,India,2020-08-02,Verified Purchase,electronics,mobiles
4,OnePlus Nord 5G,OnePlus,Changed to Nord from 6t,5.0,I was skeptical about changing from One plus...,109 people found this helpful,India,2020-08-04,Verified Purchase,electronics,mobiles
...,...,...,...,...,...,...,...,...,...,...,...
30607,Redmi Note 9 Pro,Redmi,Great but not satisfied for me,2.0,Quality of phone is great but from my perspe...,0,India,2020-09-03,Verified Purchase,electronics,mobiles
30608,Redmi Note 9 Pro,Redmi,Not recommend,2.0,Not recommend,0,India,2020-05-20,Verified Purchase,electronics,mobiles
30609,Redmi Note 9 Pro,Redmi,WORST MARKETING TACTICS OF FLASH SALE OF REDMI...,2.0,Redmi and Amazon is engaged in a worst marke...,3 people found this helpful,India,2020-06-29,na,electronics,mobiles
30610,Redmi Note 9 Pro,Redmi,Display retention issue,2.0,"I am facing display retention problem, after...",0,India,2020-04-26,na,electronics,mobiles


In [8]:
# Install the word2number package, required for converting textual number expressions to numeric form
!pip install word2number

Collecting word2number
  Downloading word2number-1.1.zip (9.7 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: word2number
  Building wheel for word2number (setup.py) ... [?25l[?25hdone
  Created wheel for word2number: filename=word2number-1.1-py3-none-any.whl size=5566 sha256=5c80a8813394da7b6419543123f762ac3a71022b4311ec490cb0a34171200c9e
  Stored in directory: /root/.cache/pip/wheels/84/ff/26/d3cfbd971e96c5aa3737ecfced81628830d7359b55fbb8ca3b
Successfully built word2number
Installing collected packages: word2number
Successfully installed word2number-1.1


In [9]:
from word2number import w2n

# Clean 'helpful_count' by converting word-based numbers and removing commas
for i in range(len(amazon_one_plus_reviews_db['helpful_count'])):
  new_value = amazon_one_plus_reviews_db['helpful_count'][i].split()[0].replace(',', '')

  try: # First, try to convert directly to int
      new_value = int(new_value)

  except ValueError: # If direct conversion fails, try to convert from words

      try:
          new_value = w2n.word_to_num(new_value)

      except ValueError: # If both conversions fail, assign NA
          new_value = pd.NA

  amazon_one_plus_reviews_db['helpful_count'][i] = new_value

# Ensure 'helpful_count' column is of integer type
amazon_one_plus_reviews_db['helpful_count'] = amazon_one_plus_reviews_db['helpful_count'].astype(int)

amazon_one_plus_reviews_db

Unnamed: 0,product,product_company,review_title,review_rating,review_text,helpful_count,review_country,reviewed_at,verified_purchase,category,sub_category
0,OnePlus Nord 5G,OnePlus,*Read before you buy!!*,5.0,"Yea..pre-ordered on 28 July, got it on 4 Aug...",721,India,2020-08-04,Verified Purchase,electronics,mobiles
1,OnePlus Nord 5G,OnePlus,Near to mid range Perfection,5.0,"Got it delivered yesterday , used for about ...",436,India,2020-08-03,Verified Purchase,electronics,mobiles
2,OnePlus Nord 5G,OnePlus,Great price!,5.0,An amazing phone!,322,India,2020-08-04,Verified Purchase,electronics,mobiles
3,OnePlus Nord 5G,OnePlus,Beast in OnePlus.,5.0,Brilliant..,243,India,2020-08-02,Verified Purchase,electronics,mobiles
4,OnePlus Nord 5G,OnePlus,Changed to Nord from 6t,5.0,I was skeptical about changing from One plus...,109,India,2020-08-04,Verified Purchase,electronics,mobiles
...,...,...,...,...,...,...,...,...,...,...,...
30607,Redmi Note 9 Pro,Redmi,Great but not satisfied for me,2.0,Quality of phone is great but from my perspe...,0,India,2020-09-03,Verified Purchase,electronics,mobiles
30608,Redmi Note 9 Pro,Redmi,Not recommend,2.0,Not recommend,0,India,2020-05-20,Verified Purchase,electronics,mobiles
30609,Redmi Note 9 Pro,Redmi,WORST MARKETING TACTICS OF FLASH SALE OF REDMI...,2.0,Redmi and Amazon is engaged in a worst marke...,3,India,2020-06-29,na,electronics,mobiles
30610,Redmi Note 9 Pro,Redmi,Display retention issue,2.0,"I am facing display retention problem, after...",0,India,2020-04-26,na,electronics,mobiles


In [10]:
# Trim whitespace from 'review_country' values
for i in range(len(amazon_one_plus_reviews_db['review_country'])):
  amazon_one_plus_reviews_db['review_country'][i] = amazon_one_plus_reviews_db['review_country'][i].strip()

amazon_one_plus_reviews_db

Unnamed: 0,product,product_company,review_title,review_rating,review_text,helpful_count,review_country,reviewed_at,verified_purchase,category,sub_category
0,OnePlus Nord 5G,OnePlus,*Read before you buy!!*,5.0,"Yea..pre-ordered on 28 July, got it on 4 Aug...",721,India,2020-08-04,Verified Purchase,electronics,mobiles
1,OnePlus Nord 5G,OnePlus,Near to mid range Perfection,5.0,"Got it delivered yesterday , used for about ...",436,India,2020-08-03,Verified Purchase,electronics,mobiles
2,OnePlus Nord 5G,OnePlus,Great price!,5.0,An amazing phone!,322,India,2020-08-04,Verified Purchase,electronics,mobiles
3,OnePlus Nord 5G,OnePlus,Beast in OnePlus.,5.0,Brilliant..,243,India,2020-08-02,Verified Purchase,electronics,mobiles
4,OnePlus Nord 5G,OnePlus,Changed to Nord from 6t,5.0,I was skeptical about changing from One plus...,109,India,2020-08-04,Verified Purchase,electronics,mobiles
...,...,...,...,...,...,...,...,...,...,...,...
30607,Redmi Note 9 Pro,Redmi,Great but not satisfied for me,2.0,Quality of phone is great but from my perspe...,0,India,2020-09-03,Verified Purchase,electronics,mobiles
30608,Redmi Note 9 Pro,Redmi,Not recommend,2.0,Not recommend,0,India,2020-05-20,Verified Purchase,electronics,mobiles
30609,Redmi Note 9 Pro,Redmi,WORST MARKETING TACTICS OF FLASH SALE OF REDMI...,2.0,Redmi and Amazon is engaged in a worst marke...,3,India,2020-06-29,na,electronics,mobiles
30610,Redmi Note 9 Pro,Redmi,Display retention issue,2.0,"I am facing display retention problem, after...",0,India,2020-04-26,na,electronics,mobiles


In [11]:
# Convert 'verified_purchase' from string to binary numeric values, where 'na' becomes 0 and all other values become 1
for i in range(len(amazon_one_plus_reviews_db['verified_purchase'])):
  current_value = amazon_one_plus_reviews_db['verified_purchase'][i]

  if current_value == 'na':
    amazon_one_plus_reviews_db['verified_purchase'][i] = 0
  else:
    amazon_one_plus_reviews_db['verified_purchase'][i] = 1

# Ensure 'verified_purchase' column is of integer type
amazon_one_plus_reviews_db['verified_purchase'] = amazon_one_plus_reviews_db['verified_purchase'].astype(int)

amazon_one_plus_reviews_db

Unnamed: 0,product,product_company,review_title,review_rating,review_text,helpful_count,review_country,reviewed_at,verified_purchase,category,sub_category
0,OnePlus Nord 5G,OnePlus,*Read before you buy!!*,5.0,"Yea..pre-ordered on 28 July, got it on 4 Aug...",721,India,2020-08-04,1,electronics,mobiles
1,OnePlus Nord 5G,OnePlus,Near to mid range Perfection,5.0,"Got it delivered yesterday , used for about ...",436,India,2020-08-03,1,electronics,mobiles
2,OnePlus Nord 5G,OnePlus,Great price!,5.0,An amazing phone!,322,India,2020-08-04,1,electronics,mobiles
3,OnePlus Nord 5G,OnePlus,Beast in OnePlus.,5.0,Brilliant..,243,India,2020-08-02,1,electronics,mobiles
4,OnePlus Nord 5G,OnePlus,Changed to Nord from 6t,5.0,I was skeptical about changing from One plus...,109,India,2020-08-04,1,electronics,mobiles
...,...,...,...,...,...,...,...,...,...,...,...
30607,Redmi Note 9 Pro,Redmi,Great but not satisfied for me,2.0,Quality of phone is great but from my perspe...,0,India,2020-09-03,1,electronics,mobiles
30608,Redmi Note 9 Pro,Redmi,Not recommend,2.0,Not recommend,0,India,2020-05-20,1,electronics,mobiles
30609,Redmi Note 9 Pro,Redmi,WORST MARKETING TACTICS OF FLASH SALE OF REDMI...,2.0,Redmi and Amazon is engaged in a worst marke...,3,India,2020-06-29,0,electronics,mobiles
30610,Redmi Note 9 Pro,Redmi,Display retention issue,2.0,"I am facing display retention problem, after...",0,India,2020-04-26,0,electronics,mobiles


In [12]:
# Check data types of all columns for confirmation
amazon_one_plus_reviews_db.dtypes

product                      object
product_company              object
review_title                 object
review_rating               float64
review_text                  object
helpful_count                 int64
review_country               object
reviewed_at          datetime64[ns]
verified_purchase             int64
category                     object
sub_category                 object
dtype: object

In [13]:
# Combine 'review_title' and 'review_text' into a single 'review_description' column, formatted with tags <t> for 'title' and <b> for 'body'
review_descripiton = []
for i in range(len(amazon_one_plus_reviews_db['review_title'])):
  review_title_elem = amazon_one_plus_reviews_db['review_title'][i]
  review_text_elem = amazon_one_plus_reviews_db['review_text'][i]
  new_elem = f'<t>{review_title_elem}</t><b>{review_text_elem}</b>' # t -> title | b -> body
  review_descripiton.append(new_elem)

amazon_one_plus_reviews_db['review_descripiton'] = review_descripiton

# Drop the now redundant 'review_title' and 'review_text' columns
amazon_one_plus_reviews_db.drop(['review_title', 'review_text'], axis=1, inplace=True)

amazon_one_plus_reviews_db

Unnamed: 0,product,product_company,review_rating,helpful_count,review_country,reviewed_at,verified_purchase,category,sub_category,review_descripiton
0,OnePlus Nord 5G,OnePlus,5.0,721,India,2020-08-04,1,electronics,mobiles,<t>*Read before you buy!!*</t><b> Yea..pre-or...
1,OnePlus Nord 5G,OnePlus,5.0,436,India,2020-08-03,1,electronics,mobiles,<t>Near to mid range Perfection</t><b> Got i...
2,OnePlus Nord 5G,OnePlus,5.0,322,India,2020-08-04,1,electronics,mobiles,<t>Great price!</t><b> An amazing phone!</b>
3,OnePlus Nord 5G,OnePlus,5.0,243,India,2020-08-02,1,electronics,mobiles,<t>Beast in OnePlus.</t><b> Brilliant..</b>
4,OnePlus Nord 5G,OnePlus,5.0,109,India,2020-08-04,1,electronics,mobiles,<t>Changed to Nord from 6t</t><b> I was skept...
...,...,...,...,...,...,...,...,...,...,...
30607,Redmi Note 9 Pro,Redmi,2.0,0,India,2020-09-03,1,electronics,mobiles,<t>Great but not satisfied for me</t><b> Qual...
30608,Redmi Note 9 Pro,Redmi,2.0,0,India,2020-05-20,1,electronics,mobiles,<t>Not recommend</t><b> Not recommend</b>
30609,Redmi Note 9 Pro,Redmi,2.0,3,India,2020-06-29,0,electronics,mobiles,<t>WORST MARKETING TACTICS OF FLASH SALE OF RE...
30610,Redmi Note 9 Pro,Redmi,2.0,0,India,2020-04-26,0,electronics,mobiles,<t>Display retention issue</t><b> I am facing...


In [14]:
# Check for any null values again after transformations
for column_name in amazon_one_plus_reviews_db.columns.tolist():
  print(amazon_one_plus_reviews_db[column_name].isnull().any())

False
False
False
False
False
False
False
False
False
False


In [15]:
# Rename columns
amazon_one_plus_reviews_db.columns = ['product_name', 'product_company', 'review_rating', 'review_thumbsup_count', 'review_country', 'review_date', 'is_verfied_purchase', 'product_category', 'product_sub_category', 'review_descripiton']

# Reorder columns
new_order = ['product_name', 'product_company', 'review_descripiton', 'review_rating', 'review_thumbsup_count', 'review_country', 'review_date', 'is_verfied_purchase', 'product_category', 'product_sub_category']
amazon_one_plus_reviews_db = amazon_one_plus_reviews_db[new_order]

# Adding unique IDs
amazon_one_plus_reviews_db.insert(0, 'row_id', range(1, 1 + len(amazon_one_plus_reviews_db)))
amazon_one_plus_reviews_db

Unnamed: 0,row_id,product_name,product_company,review_descripiton,review_rating,review_thumbsup_count,review_country,review_date,is_verfied_purchase,product_category,product_sub_category
0,1,OnePlus Nord 5G,OnePlus,<t>*Read before you buy!!*</t><b> Yea..pre-or...,5.0,721,India,2020-08-04,1,electronics,mobiles
1,2,OnePlus Nord 5G,OnePlus,<t>Near to mid range Perfection</t><b> Got i...,5.0,436,India,2020-08-03,1,electronics,mobiles
2,3,OnePlus Nord 5G,OnePlus,<t>Great price!</t><b> An amazing phone!</b>,5.0,322,India,2020-08-04,1,electronics,mobiles
3,4,OnePlus Nord 5G,OnePlus,<t>Beast in OnePlus.</t><b> Brilliant..</b>,5.0,243,India,2020-08-02,1,electronics,mobiles
4,5,OnePlus Nord 5G,OnePlus,<t>Changed to Nord from 6t</t><b> I was skept...,5.0,109,India,2020-08-04,1,electronics,mobiles
...,...,...,...,...,...,...,...,...,...,...,...
30607,30608,Redmi Note 9 Pro,Redmi,<t>Great but not satisfied for me</t><b> Qual...,2.0,0,India,2020-09-03,1,electronics,mobiles
30608,30609,Redmi Note 9 Pro,Redmi,<t>Not recommend</t><b> Not recommend</b>,2.0,0,India,2020-05-20,1,electronics,mobiles
30609,30610,Redmi Note 9 Pro,Redmi,<t>WORST MARKETING TACTICS OF FLASH SALE OF RE...,2.0,3,India,2020-06-29,0,electronics,mobiles
30610,30611,Redmi Note 9 Pro,Redmi,<t>Display retention issue</t><b> I am facing...,2.0,0,India,2020-04-26,0,electronics,mobiles


`amazon_one_plus_reviews_db` is ready to be loaded.

In [16]:
# Create product dimension
product_dimension = amazon_one_plus_reviews_db[['row_id', 'product_name', 'product_category', 'product_sub_category', 'product_company']]
product_dimension['product_price'] = pd.Series()

# Renaming the row_id column
product_dimension.rename(columns={'row_id': 'product_id'}, inplace=True)
product_dimension

Unnamed: 0,product_id,product_name,product_category,product_sub_category,product_company,product_price
0,1,OnePlus Nord 5G,electronics,mobiles,OnePlus,
1,2,OnePlus Nord 5G,electronics,mobiles,OnePlus,
2,3,OnePlus Nord 5G,electronics,mobiles,OnePlus,
3,4,OnePlus Nord 5G,electronics,mobiles,OnePlus,
4,5,OnePlus Nord 5G,electronics,mobiles,OnePlus,
...,...,...,...,...,...,...
30607,30608,Redmi Note 9 Pro,electronics,mobiles,Redmi,
30608,30609,Redmi Note 9 Pro,electronics,mobiles,Redmi,
30609,30610,Redmi Note 9 Pro,electronics,mobiles,Redmi,
30610,30611,Redmi Note 9 Pro,electronics,mobiles,Redmi,


In [17]:
# Create review dimension
review_dimension = amazon_one_plus_reviews_db[['row_id', 'review_rating', 'review_descripiton', 'review_thumbsup_count', 'review_country']]

# Renaming the row_id column
review_dimension.rename(columns={'row_id': 'review_id'}, inplace=True)
review_dimension

Unnamed: 0,review_id,review_rating,review_descripiton,review_thumbsup_count,review_country
0,1,5.0,<t>*Read before you buy!!*</t><b> Yea..pre-or...,721,India
1,2,5.0,<t>Near to mid range Perfection</t><b> Got i...,436,India
2,3,5.0,<t>Great price!</t><b> An amazing phone!</b>,322,India
3,4,5.0,<t>Beast in OnePlus.</t><b> Brilliant..</b>,243,India
4,5,5.0,<t>Changed to Nord from 6t</t><b> I was skept...,109,India
...,...,...,...,...,...
30607,30608,2.0,<t>Great but not satisfied for me</t><b> Qual...,0,India
30608,30609,2.0,<t>Not recommend</t><b> Not recommend</b>,0,India
30609,30610,2.0,<t>WORST MARKETING TACTICS OF FLASH SALE OF RE...,3,India
30610,30611,2.0,<t>Display retention issue</t><b> I am facing...,0,India


In [18]:
# Format the dates
dates_list = [[date.day, date.month, date.year] for date in amazon_one_plus_reviews_db['review_date'].copy()]

# Create date dimension
date_dimension = pd.DataFrame(dates_list, columns=['day', 'month', 'year'])

# Adding unique IDs
date_dimension.insert(0, 'date_id', range(1, 1 + len(date_dimension)))
date_dimension

Unnamed: 0,date_id,day,month,year
0,1,4,8,2020
1,2,3,8,2020
2,3,4,8,2020
3,4,2,8,2020
4,5,4,8,2020
...,...,...,...,...
30607,30608,3,9,2020
30608,30609,20,5,2020
30609,30610,29,6,2020
30610,30611,26,4,2020


In [19]:
# Export the cleaned DataFrames to a JSON files
from google.colab import files

amazon_one_plus_reviews_db = amazon_one_plus_reviews_db.to_json(orient='records')
product_dimension = product_dimension.to_json(orient='records')
review_dimension = review_dimension.to_json(orient='records')
date_dimension = date_dimension.to_json(orient='records')

with open('amazon_one_plus_reviews_db.json', 'w') as f:
    f.write(amazon_one_plus_reviews_db)

with open('one_plus_product_dimension.json', 'w') as f:
    f.write(product_dimension)

with open('one_plus_review_dimension.json', 'w') as f:
    f.write(review_dimension)

with open('one_plus_date_dimension.json', 'w') as f:
    f.write(date_dimension)

# Downloading the JSON files
files.download('amazon_one_plus_reviews_db.json')
files.download('one_plus_product_dimension.json')
files.download('one_plus_review_dimension.json')
files.download('one_plus_date_dimension.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>