## Project Overview
#### This is project about Customer Churn Prediction model. Basically we use 'Amazon Customer Dataset' as sample data.

In [46]:
import numpy as np
import pandas as pd
import matplotlib as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder, TargetEncoder

In [47]:
df = pd.read_csv('amazon_customer_dataset.csv')
df.head(3)

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,"AG3D6O4STAQKAY2UVGEUV46KN35Q,AHMY5CWJMMK5BJRBB...","Manav,Adarsh gupta,Sundeep,S.Sayeed Ahmed,jasp...","R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","Satisfied,Charging is really fast,Value for mo...",Looks durable Charging is fine tooNo complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
1,B098NS6PVG,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ199,‚Çπ349,43%,4.0,43994,"Compatible with all Type C enabled devices, be...","AECPFYFQVRUWC3KGNLJIOREFP5LQ,AGYYVPDD7YG7FYNBX...","ArdKn,Nirbhay kumar,Sagar Viswanathan,Asp,Plac...","RGIQEG07R9HS2,R1SMWZQ86XIN8U,R2J3Y1WL29GWDE,RY...","A Good Braided Cable for Your Type C Device,Go...",I ordered this cable to connect my phone to An...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...
2,B096MSW6CT,Sounce Fast Phone Charging Cable & Data Sync U...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ199,"‚Çπ1,899",90%,3.9,7928,„Äê Fast Charger& Data Sync„Äë-With built-in safet...,"AGU3BBQ2V2DDAMOAKGFAWDDQ6QHA,AESFLDV2PT363T2AQ...","Kunal,Himanshu,viswanath,sai niharka,saqib mal...","R3J3EQQ9TZI5ZJ,R3E7WBGK7ID0KV,RWU79XKQ6I1QF,R2...","Good speed for earlier versions,Good Product,W...","Not quite durable and sturdy,https://m.media-a...",https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...


## Data Investigation
#### To create a Customer churn prediction model we need basically customer wise data. But in a primary investigation from df.head(3) identifier columns has data as a list format. We have to separate this data into a single customer data per row.

In [48]:
df.shape

(1465, 16)

In [49]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   product_id           1465 non-null   object
 1   product_name         1465 non-null   object
 2   category             1465 non-null   object
 3   discounted_price     1465 non-null   object
 4   actual_price         1465 non-null   object
 5   discount_percentage  1465 non-null   object
 6   rating               1465 non-null   object
 7   rating_count         1463 non-null   object
 8   about_product        1465 non-null   object
 9   user_id              1465 non-null   object
 10  user_name            1465 non-null   object
 11  review_id            1465 non-null   object
 12  review_title         1465 non-null   object
 13  review_content       1465 non-null   object
 14  img_link             1465 non-null   object
 15  product_link         1465 non-null   object
dtypes: obj

In [50]:
df.isna().sum()

product_id             0
product_name           0
category               0
discounted_price       0
actual_price           0
discount_percentage    0
rating                 0
rating_count           2
about_product          0
user_id                0
user_name              0
review_id              0
review_title           0
review_content         0
img_link               0
product_link           0
dtype: int64

#### There is no null values except 2 into rating_count.

In [51]:

cols_to_split = ['user_id', 'user_name', 'review_id', 'review_title', 'review_content']

for col in cols_to_split:
    df[col] = df[col].astype(str).str.split(',')

# df['__len__'] = df[cols_to_split[0]].apply(len)

# for col in cols_to_split[1:]:
#     df = df[df['__len__'] == df[col].apply(len)]


## Data Spliting Process
#### At first we separate the data from identifier column with comma. Then we tried length wise separation. Actually it works but we lost length mismatched data. Our target is to get a precise model, this kind of data loss can accelerate our model to towards biasness. So we will fixed mismatched data with blank string character. Then We will replace with NaN. 

In [52]:
#check the column length and fill with blank space(padding function)
def pad_row(row):
    max_len = max(len(row[col]) for col in cols_to_split)
    for col in cols_to_split:
        diff = max_len - len(row[col])
        if diff > 0:
            row[col] = row[col] + ['']*diff
    return row

In [53]:
df_padded = df.apply(pad_row, axis=1)
expand_df = df_padded.explode(cols_to_split, ignore_index=True)
expand_df.head()

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,R3HXWT0LRP0NMF,Satisfied,Looks durable Charging is fine tooNo complains,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
1,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AHMY5CWJMMK5BJRBBSNLYT3ONILA,Adarsh gupta,R2AJM3LFTLZHFO,Charging is really fast,Charging is really fast,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
2,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AHCTC6ULH4XB6YHDY6PCH2R772LQ,Sundeep,R6AQJGUP6P86,Value for money,good product.,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
3,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AGYHHIERNXKA6P5T7CZLXKVPT7IQ,S.Sayeed Ahmed,R1KD19VHEDV0OR,Product review,Till now satisfied with the quality.,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
4,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AG4OGOFWXJZTQ2HKYIOCOY3KXF2Q,jaspreet singh,R3C02RMYQMK6FC,Good quality,This is a good product . The charging speed is...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...


In [54]:
expand_df[expand_df['review_id'] == 'R3HXWT0LRP0NMF']

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,R3HXWT0LRP0NMF,Satisfied,Looks durable Charging is fine tooNo complains,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
637,B07JW1Y6XV,Wayona Nylon Braided 3A Lightning to USB A Syn...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,"[High Compatibility] : iPhone X/XsMax/Xr ,iPho...",AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,R3HXWT0LRP0NMF,Satisfied,Looks durable Charging is fine tooNo complains,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LB1-Sy...
1215,B07LGT55SJ,Wayona Usb Nylon Braided Data Sync And Chargin...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,[High Compatibility] : Compatible For iPhone X...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,R3HXWT0LRP0NMF,Satisfied,Looks durable Charging is fine tooNo complains,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-Syncing-C...
1428,B07JH1C41D,Wayona Nylon Braided (2 Pack) Lightning Fast U...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ649,"‚Çπ1,999",68%,4.2,24269,"[High Compatibility] : iPhone X/XsMax/Xr ,iPho...",AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,R3HXWT0LRP0NMF,Satisfied,Looks durable Charging is fine tooNo complains,https://m.media-amazon.com/images/I/412fvb7k2F...,https://www.amazon.in/Wayona-Braided-WN3LG2-Sy...
1638,B07JGDB5M1,Wayona Nylon Braided 2M / 6Ft Fast Charge Usb ...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ449,"‚Çπ1,299",65%,4.2,24269,"[High Compatibility] : Phone X/XsMax/Xr ,Phone...",AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,R3HXWT0LRP0NMF,Satisfied,Looks durable Charging is fine tooNo complains,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN6LG1-Sy...
3220,B07JH1CBGW,Wayona Nylon Braided Usb Syncing And Charging ...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ649,"‚Çπ1,999",68%,4.2,24269,"[High Compatibility] : iPhone X/XsMax/Xr ,iPho...",AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,R3HXWT0LRP0NMF,Satisfied,Looks durable Charging is fine tooNo complains,https://m.media-amazon.com/images/I/41eHLj-wfG...,https://www.amazon.in/Wayona-Braided-WN3LB2-Sy...
5444,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24270,High Compatibility : Compatible With iPhone 12...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,R3HXWT0LRP0NMF,Satisfied,Looks durable Charging is fine tooNo complains,https://m.media-amazon.com/images/I/51UsScvHQN...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
9706,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,R3HXWT0LRP0NMF,Satisfied,Looks durable Charging is fine tooNo complains,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...


In [55]:
expand_df.shape

(21968, 16)

#### After separation of data, we can see each row is specifically contains data for each customer. So the row size increased and we can use further process now.

In [56]:
expand_df = expand_df.replace('', np.nan)

info_cols = ['user_id', 'user_name', 'review_id']

expand_df = expand_df.dropna(subset=info_cols, how='all')


In [57]:
expand_df.isna().sum().sum()

np.int64(285)

## Fill data with info from identifier columns
#### As we can see, we dropped only those data where all our identifier columns are missing. 
#### why? Because if there is no information in any column of the identifier column, there will be no matching data to fill the empty cells.

In [58]:
def fill_user_id(df):

    for idx, row in df[df['user_id'].isna()].iterrows():
        #match with user_name
        if pd.notna(row['user_name']):
            match = df[df['user_name'] == row['user_name']]['user_id'].dropna()
            if len(match)>0:
                df.at[idx, 'user_id'] = match.iloc[0]
        
        #match with review_id
        if pd.notna(row['review_id']):
            match = df[df['review_id'] == row['review_id']]['user_id'].dropna()
            if len(match)>0:
                df.at[idx, 'user_id'] = match.iloc[0]
    return df


def fill_user_name(df):

    for idx, row in df[df['user_name'].isna()].iterrows():
        #match with user_id
        if pd.notna(row['user_id']):
            match = df[df['user_id'] == row['user_id']]['user_name'].dropna()
            if len(match)>0:
                df.at[idx, 'user_name'] = match.iloc[0]

        if pd.notna(row['review_id']):
            match = df[df['review_id'] == row['review_id']]['user_name'].dropna()
            if len(match)>0:
                df.at[idx, 'user_name'] = match.iloc[0]
    return df


def fill_review_id(df):

    for idx, row in df[df['review_id'].isna()].iterrows():

        #match with user_id
        if pd.notna(row['user_id']):
            match = df[df['user_id'] == row['user_id']]['review_id'].dropna()
            if len(match)>0:
                df.at[idx, 'review_id'] = match.iloc[0]

        #match with user_name
        if pd.notna(row['user_name']):
            match = df[df['user_name'] == row['user_name']]['review_id'].dropna()
            if len(match)>0:
                df.at[idx, 'review_id'] = match.iloc[0]
    return df       

## Smart Fill
#### Now if we think our previous explanations we recursively use a word 'Identifier'. What does it means? Here is hide our filling methods. We will match and fill in the information in the other identifier columns.
#### You can see above we create the filling fuction.

In [59]:
expand_df = fill_user_id(expand_df)
expand_df = fill_user_name(expand_df)
expand_df = fill_review_id(expand_df)

In [60]:
#null contain rows
expand_df[expand_df.isna().any(axis=1)]

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
71,B08Y1TFSP6,pTron Solero TB301 3A Type-C Data and Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ149,"‚Çπ1,000",85%,3.9,24871,Fast Charging & Data Sync: Solero TB301 Type-C...,AHFAAPSY2MJ5HYOU2VQDJ7AQY4NQ,Ajaybabu.O.M,R18MP1KLUE18PC,,,https://m.media-amazon.com/images/I/31wOPjcSxl...,https://www.amazon.in/Solero-TB301-Charging-48...
191,B083342NKJ,MI Braided USB Type-C Cable for Charging Adapt...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ349,‚Çπ399,13%,4.4,18757,1M Long Cable. Usb 2.0 (Type A)|Toughened Join...,AHICHCW6EC3BNV2IDAEAJPBG4HZQ,Deepak,R1ZBD2ZB2ZYEWX,I really liked this one.,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Mi-Braided-USB-Type-C-Ca...
193,B083342NKJ,MI Braided USB Type-C Cable for Charging Adapt...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ349,‚Çπ399,13%,4.4,18757,1M Long Cable. Usb 2.0 (Type A)|Toughened Join...,AGEOQQHGNELZNEUKJAJUA7NTPBLA,Rakesh,R1115HIQP3BKKJ,,Best data cable charging fast,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Mi-Braided-USB-Type-C-Ca...
223,B08WRBG3XW,"boAt Type C A325 Tangle-free, Sturdy Type C Ca...",Computers&Accessories|Accessories&Peripherals|...,‚Çπ199,‚Çπ499,60%,4.1,13045,Type C A 325 Cable Is Designed With A Perfect ...,AGXRGH7DLS3RVFS5KWU4PGR3H3GQ,Gursewak Singh,R3VIKEVJ5DBF5G,Good Quality but less Power Delivery,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/boAt-A325-Tangle-Free-Ch...
369,B094JNXNPV,Ambrane Unbreakable 3 in 1 Fast Charging Braid...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ299,‚Çπ399,25%,4.0,2766,Blazing Charging - All combined 3 in 1 cable s...,AEO5NTPVZBDP7EHO2NOJ3Q6QPN3A,C,R1M0NVGZXK8NGO,Sturdy,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21862,B08ZXZ362Z,akiara - Makes life easy Mini Sewing Machine f...,Home&Kitchen|Kitchen&HomeAppliances|SewingMach...,"‚Çπ1,563","‚Çπ3,098",50%,3.5,2283,PORTABLE MINI SEWING MACHINE with threads and ...,AHX4KZVASMGQQOST4T2RAQUZTLCA,Kirti Gupta,R32BCA8W6W1KIF,Good for beginners or minor repairs,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/akiara-Tailoring-Stitchi...
21867,B08ZXZ362Z,akiara - Makes life easy Mini Sewing Machine f...,Home&Kitchen|Kitchen&HomeAppliances|SewingMach...,"‚Çπ1,563","‚Çπ3,098",50%,3.5,2283,PORTABLE MINI SEWING MACHINE with threads and ...,AHX47N6TUNADPXMMQKVASVBNIHJA,krishna chaudhari,R39210FVK81Z0W,Good product,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/akiara-Tailoring-Stitchi...
21868,B00GHL8VP2,USHA 1212 PTC with Adjustable Thermostat Fan H...,"Home&Kitchen|Heating,Cooling&AirQuality|RoomHe...","‚Çπ3,487.77","‚Çπ4,990",30%,4.1,1127,Meant for Spot Heating|Room Size: Upto 15 sq f...,AECFYIUCHSZXDLACTYPEUSM5DIKA,Manoj Kumar,R1T19FVDX8Z7T2,I like this product,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Usha-1212-PTC-Adjustable...
21877,B0B9JZW1SQ,"4 in 1 Handheld Electric Vegetable Cutter Set,...",Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,‚Çπ498,"‚Çπ1,200",59%,3.2,113,{ 4 in 1 multi-function Electric Vegetable Cut...,AFCTMQKPVJI6Y2JPIGDKRKIAV43A,Ashish,R3N2A5DV7IPG6R,Cutter speed and power is very low,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Handheld-Electric-Vegeta...


In [61]:
#null contain columns
expand_df.isna().sum()

product_id               0
product_name             0
category                 0
discounted_price         0
actual_price             0
discount_percentage      0
rating                   0
rating_count             2
about_product            0
user_id                  3
user_name                1
review_id                3
review_title            25
review_content         233
img_link                 0
product_link             0
dtype: int64

In [62]:
expand_df[expand_df[info_cols].isna().any(axis=1)]

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
1063,B0B9XN9S3W,Acer 80 cm (32 inches) N Series HD Ready TV AR...,"Electronics|HomeTheater,TV&Video|Televisions|S...","‚Çπ7,999","‚Çπ14,990",47%,4.3,457,Resolution: HD Ready (1366 x 768) | Refresh Ra...,,T Karthikeyan,,,not that bad. If you want to buy a 32'' TV at...,https://m.media-amazon.com/images/I/51FicDnawa...,https://www.amazon.in/Acer-inches-Ready-AR32NS...
1656,B0981XSZJ7,CROSSVOLT Compatible Dash/Warp Data Sync Fast ...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ299,‚Çπ999,70%,4.3,766,Compatible for oneplus 9 PRO/9/9R/8T/8PRO/7PRO...,,Prakash Waghchaure,,,Product is Suitable for my Oneplus Nord workin...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/CROSSVOLT-Compatible-Cha...
14883,B07T9FV9YP,Redgear Cloak Wired RGB Wired Over Ear Gaming ...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ749,"‚Çπ1,799",58%,4.0,13199,Sound Quality: Redgear Cloak comes equipped wi...,,CM,,,i also use the windows sonic for headphones a...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Redgear-Cloak-Gaming-Hea...
17155,B07MKMFKPG,Bosch Pro 1000W Mixer Grinder MGM8842MIN - Black,Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,"‚Çπ6,999","‚Çπ10,590",34%,4.4,11499,Uniquely designed blunt PoundingBlade with thi...,AGOEYCHBYOAN53ZBHUMCS5GUSVTQ,,R3NTM54N8T1YCL,Powerfull mixer grinder,Chana dal paste,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Bosch-TrueMixx-Pro-Grind...


In [63]:
expand_df[info_cols] = expand_df[info_cols].fillna('Unknown')

In [64]:
expand_df.isna().sum()

product_id               0
product_name             0
category                 0
discounted_price         0
actual_price             0
discount_percentage      0
rating                   0
rating_count             2
about_product            0
user_id                  0
user_name                0
review_id                0
review_title            25
review_content         233
img_link                 0
product_link             0
dtype: int64

In [65]:
for idx, row in expand_df[expand_df['rating_count'].isna()].iterrows():
    if pd.notna(row['product_id']):
        match = expand_df[expand_df['product_id'] == row['product_id']]['rating_count'].dropna()
        if len(match)>0:
            expand_df.at[idx, 'rating_count'] = match.iloc[0]

In [66]:
expand_df[expand_df['rating_count'].isna()]

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
4058,B0B94JPY2N,Amazon Brand - Solimo 65W Fast Charging Braide...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ199,‚Çπ999,80%,3.0,,USB C to C Cable: This cable has type C connec...,AE7CFHY23VAJT2FI4NZKKP6GS2UQ,Pranav,RUB7U91HVZ30,The cable works but is not 65W as advertised,I have a pd supported car charger and I bought...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Amazon-Brand-Charging-Su...
4547,B0BQRJ3C47,"REDTECH USB-C to Lightning Cable 3.3FT, [Apple...",Computers&Accessories|Accessories&Peripherals|...,‚Çπ249,‚Çπ999,75%,5.0,,üíé[The Fastest Charge] - This iPhone USB C cabl...,AGJC5O5H5BBXWUV7WRIEIOOR3TVQ,Abdul Gafur,RQXD5SAMMPC6L,Awesome Product,Quick delivery.Awesome ProductPacking was good...,https://m.media-amazon.com/images/I/31-q0xhaTA...,https://www.amazon.in/REDTECH-Lightning-Certif...


In [67]:
expand_df[expand_df['product_id'] == 'B0B94JPY2N']


Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
4058,B0B94JPY2N,Amazon Brand - Solimo 65W Fast Charging Braide...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ199,‚Çπ999,80%,3.0,,USB C to C Cable: This cable has type C connec...,AE7CFHY23VAJT2FI4NZKKP6GS2UQ,Pranav,RUB7U91HVZ30,The cable works but is not 65W as advertised,I have a pd supported car charger and I bought...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Amazon-Brand-Charging-Su...


In [68]:
expand_df[expand_df['product_id'] == 'B0BQRJ3C47']

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
4547,B0BQRJ3C47,"REDTECH USB-C to Lightning Cable 3.3FT, [Apple...",Computers&Accessories|Accessories&Peripherals|...,‚Çπ249,‚Çπ999,75%,5.0,,üíé[The Fastest Charge] - This iPhone USB C cabl...,AGJC5O5H5BBXWUV7WRIEIOOR3TVQ,Abdul Gafur,RQXD5SAMMPC6L,Awesome Product,Quick delivery.Awesome ProductPacking was good...,https://m.media-amazon.com/images/I/31-q0xhaTA...,https://www.amazon.in/REDTECH-Lightning-Certif...


In [69]:
expand_df[expand_df.isna().any(axis=1)]

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
71,B08Y1TFSP6,pTron Solero TB301 3A Type-C Data and Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ149,"‚Çπ1,000",85%,3.9,24871,Fast Charging & Data Sync: Solero TB301 Type-C...,AHFAAPSY2MJ5HYOU2VQDJ7AQY4NQ,Ajaybabu.O.M,R18MP1KLUE18PC,,,https://m.media-amazon.com/images/I/31wOPjcSxl...,https://www.amazon.in/Solero-TB301-Charging-48...
191,B083342NKJ,MI Braided USB Type-C Cable for Charging Adapt...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ349,‚Çπ399,13%,4.4,18757,1M Long Cable. Usb 2.0 (Type A)|Toughened Join...,AHICHCW6EC3BNV2IDAEAJPBG4HZQ,Deepak,R1ZBD2ZB2ZYEWX,I really liked this one.,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Mi-Braided-USB-Type-C-Ca...
193,B083342NKJ,MI Braided USB Type-C Cable for Charging Adapt...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ349,‚Çπ399,13%,4.4,18757,1M Long Cable. Usb 2.0 (Type A)|Toughened Join...,AGEOQQHGNELZNEUKJAJUA7NTPBLA,Rakesh,R1115HIQP3BKKJ,,Best data cable charging fast,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Mi-Braided-USB-Type-C-Ca...
223,B08WRBG3XW,"boAt Type C A325 Tangle-free, Sturdy Type C Ca...",Computers&Accessories|Accessories&Peripherals|...,‚Çπ199,‚Çπ499,60%,4.1,13045,Type C A 325 Cable Is Designed With A Perfect ...,AGXRGH7DLS3RVFS5KWU4PGR3H3GQ,Gursewak Singh,R3VIKEVJ5DBF5G,Good Quality but less Power Delivery,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/boAt-A325-Tangle-Free-Ch...
369,B094JNXNPV,Ambrane Unbreakable 3 in 1 Fast Charging Braid...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ299,‚Çπ399,25%,4.0,2766,Blazing Charging - All combined 3 in 1 cable s...,AEO5NTPVZBDP7EHO2NOJ3Q6QPN3A,C,R1M0NVGZXK8NGO,Sturdy,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21862,B08ZXZ362Z,akiara - Makes life easy Mini Sewing Machine f...,Home&Kitchen|Kitchen&HomeAppliances|SewingMach...,"‚Çπ1,563","‚Çπ3,098",50%,3.5,2283,PORTABLE MINI SEWING MACHINE with threads and ...,AHX4KZVASMGQQOST4T2RAQUZTLCA,Kirti Gupta,R32BCA8W6W1KIF,Good for beginners or minor repairs,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/akiara-Tailoring-Stitchi...
21867,B08ZXZ362Z,akiara - Makes life easy Mini Sewing Machine f...,Home&Kitchen|Kitchen&HomeAppliances|SewingMach...,"‚Çπ1,563","‚Çπ3,098",50%,3.5,2283,PORTABLE MINI SEWING MACHINE with threads and ...,AHX47N6TUNADPXMMQKVASVBNIHJA,krishna chaudhari,R39210FVK81Z0W,Good product,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/akiara-Tailoring-Stitchi...
21868,B00GHL8VP2,USHA 1212 PTC with Adjustable Thermostat Fan H...,"Home&Kitchen|Heating,Cooling&AirQuality|RoomHe...","‚Çπ3,487.77","‚Çπ4,990",30%,4.1,1127,Meant for Spot Heating|Room Size: Upto 15 sq f...,AECFYIUCHSZXDLACTYPEUSM5DIKA,Manoj Kumar,R1T19FVDX8Z7T2,I like this product,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Usha-1212-PTC-Adjustable...
21877,B0B9JZW1SQ,"4 in 1 Handheld Electric Vegetable Cutter Set,...",Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,‚Çπ498,"‚Çπ1,200",59%,3.2,113,{ 4 in 1 multi-function Electric Vegetable Cut...,AFCTMQKPVJI6Y2JPIGDKRKIAV43A,Ashish,R3N2A5DV7IPG6R,Cutter speed and power is very low,,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Handheld-Electric-Vegeta...


In [70]:
mask = expand_df['rating_count'].isna()
expand_df.loc[mask,'rating_count'] = '1'

In [71]:
expand_df['rating_count'] = [row.strip().replace(',','') for row in expand_df['rating_count'].values]

In [72]:
expand_df['rating_count'] = expand_df['rating_count'].astype(int)

In [73]:
expand_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11515 entries, 0 to 21960
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   product_id           11515 non-null  object
 1   product_name         11515 non-null  object
 2   category             11515 non-null  object
 3   discounted_price     11515 non-null  object
 4   actual_price         11515 non-null  object
 5   discount_percentage  11515 non-null  object
 6   rating               11515 non-null  object
 7   rating_count         11515 non-null  int64 
 8   about_product        11515 non-null  object
 9   user_id              11515 non-null  object
 10  user_name            11515 non-null  object
 11  review_id            11515 non-null  object
 12  review_title         11490 non-null  object
 13  review_content       11282 non-null  object
 14  img_link             11515 non-null  object
 15  product_link         11515 non-null  object
dtypes: int64(

In [74]:
# Filter multiple missing row
both_review_missing = expand_df[expand_df['review_title'].isna() & expand_df['review_content'].isna()]

In [75]:
# Filter both contain rows
both_review_contain = expand_df[expand_df['review_title'].notna() & expand_df['review_content'].notna()]
both_review_contain

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,R3HXWT0LRP0NMF,Satisfied,Looks durable Charging is fine tooNo complains,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
1,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AHMY5CWJMMK5BJRBBSNLYT3ONILA,Adarsh gupta,R2AJM3LFTLZHFO,Charging is really fast,Charging is really fast,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
2,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AHCTC6ULH4XB6YHDY6PCH2R772LQ,Sundeep,R6AQJGUP6P86,Value for money,good product.,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
3,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AGYHHIERNXKA6P5T7CZLXKVPT7IQ,S.Sayeed Ahmed,R1KD19VHEDV0OR,Product review,Till now satisfied with the quality.,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
4,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AG4OGOFWXJZTQ2HKYIOCOY3KXF2Q,jaspreet singh,R3C02RMYQMK6FC,Good quality,This is a good product . The charging speed is...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21956,B01486F4G6,Borosil Jumbo 1000-Watt Grill Sandwich Maker (...,Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,"‚Çπ2,863","‚Çπ3,690",22%,4.3,6987,"Brand-Borosil, Specification √¢‚Ç¨‚Äú 23V ~ 5Hz;1 W...",AHXCDNSXAESERITAFELQABFVNLCA,PARDEEP,R2ZC03S4QXOW4Y,Excellent product‚úå,It is a nice product,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Borosil-Jumbo-1000-Watt-...
21957,B01486F4G6,Borosil Jumbo 1000-Watt Grill Sandwich Maker (...,Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,"‚Çπ2,863","‚Çπ3,690",22%,4.3,6987,"Brand-Borosil, Specification √¢‚Ç¨‚Äú 23V ~ 5Hz;1 W...",AGRZD6CHLCUNOLMMIMIHUCG7PIFA,Anindya Pramanik,R186H8YW34BQD5,A good product for household use,easy to use. The grilling function is also wo...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Borosil-Jumbo-1000-Watt-...
21958,B01486F4G6,Borosil Jumbo 1000-Watt Grill Sandwich Maker (...,Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,"‚Çπ2,863","‚Çπ3,690",22%,4.3,6987,"Brand-Borosil, Specification √¢‚Ç¨‚Äú 23V ~ 5Hz;1 W...",AFQZVGSOSOJHKFQQMCEI4725QEKQ,Vikas Singh,R10NC3D321N59G,‡§Æ‡•Å‡§ù‡•á ‡§¨‡§ø‡§≤‡•ç‡§ï‡•Å‡§≤ ‡§≠‡•Ä ‡§Æ‡§ú‡§æ ‡§®‡§π‡•Ä‡§Ç ‡§Ü‡§Ø‡§æ ‡§î‡§∞ ‡§µ‡§æ‡§™‡§∏ ‡§ï‡§∞ ‡§¶‡§ø‡§Ø‡§æ‡•§,Very good product,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Borosil-Jumbo-1000-Watt-...
21959,B01486F4G6,Borosil Jumbo 1000-Watt Grill Sandwich Maker (...,Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,"‚Çπ2,863","‚Çπ3,690",22%,4.3,6987,"Brand-Borosil, Specification √¢‚Ç¨‚Äú 23V ~ 5Hz;1 W...",AEALVGXXIP46OZVXKRUXSDWZJMEA,Harshada Pimple,REKF75G4SOAOX,Best product,This is a pretty powerful sandwich maker,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Borosil-Jumbo-1000-Watt-...


## NLP --> Natural Language Processing
### Text Cleaning, Tokenaization & Sentiment Feature

In [76]:
#valid text row selection
text_df = expand_df[expand_df['review_content'].notna() & expand_df['rating'].notna() & (expand_df['review_content'].str.strip() != "")].copy()

In [77]:
print(text_df.shape)
print(expand_df.shape)

(11281, 16)
(11515, 16)


In [78]:
def rating_to_sentiment(rating):
    if rating>=4:
        return 'positive'
    elif rating<=2:
        return 'negative'
    else:
        return 'neutral'


In [79]:
text_df['rating'].unique()

array(['4.2', '4.0', '3.9', '4.1', '4.3', '4.4', '4.5', '3.7', '3.3',
       '3.6', '3.4', '3.8', '3.5', '4.6', '3.2', '5.0', '4.7', '3.0',
       '2.8', '4', '3.1', '4.8', '2.3', '|', '2', '3', '2.6', '2.9'],
      dtype=object)

In [80]:
expand_df['rating'] = expand_df['rating'].replace("|", np.nan)

In [81]:
expand_df['rating'] = pd.to_numeric(expand_df['rating'], errors='coerce')

In [82]:
expand_df['rating'] = expand_df.groupby('product_id')['rating']\
    .transform(lambda x: x.fillna(x.mean()))

In [83]:
expand_df['rating'].isna().sum()

np.int64(8)

#### Lets drop this negligible rows. Because this are unrecoverable.

In [84]:
expand_df['rating'] = expand_df['rating'].dropna()

In [85]:
text_df = expand_df[expand_df['review_content'].notna() & expand_df['rating'].notna() & (expand_df['review_content'].str.strip() != "")].copy()

In [86]:
print(expand_df.shape)
print(text_df.shape)

(11515, 16)
(11273, 16)


In [87]:
text_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11273 entries, 0 to 21960
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   product_id           11273 non-null  object 
 1   product_name         11273 non-null  object 
 2   category             11273 non-null  object 
 3   discounted_price     11273 non-null  object 
 4   actual_price         11273 non-null  object 
 5   discount_percentage  11273 non-null  object 
 6   rating               11273 non-null  float64
 7   rating_count         11273 non-null  int64  
 8   about_product        11273 non-null  object 
 9   user_id              11273 non-null  object 
 10  user_name            11273 non-null  object 
 11  review_id            11273 non-null  object 
 12  review_title         11256 non-null  object 
 13  review_content       11273 non-null  object 
 14  img_link             11273 non-null  object 
 15  product_link         11273 non-null  obje

In [88]:
#add sentiment column to text_df
text_df['sentiment'] = text_df['rating'].apply(rating_to_sentiment)

In [89]:
text_df.head()

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link,sentiment
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,R3HXWT0LRP0NMF,Satisfied,Looks durable Charging is fine tooNo complains,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,positive
1,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AHMY5CWJMMK5BJRBBSNLYT3ONILA,Adarsh gupta,R2AJM3LFTLZHFO,Charging is really fast,Charging is really fast,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,positive
2,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AHCTC6ULH4XB6YHDY6PCH2R772LQ,Sundeep,R6AQJGUP6P86,Value for money,good product.,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,positive
3,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AGYHHIERNXKA6P5T7CZLXKVPT7IQ,S.Sayeed Ahmed,R1KD19VHEDV0OR,Product review,Till now satisfied with the quality.,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,positive
4,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AG4OGOFWXJZTQ2HKYIOCOY3KXF2Q,jaspreet singh,R3C02RMYQMK6FC,Good quality,This is a good product . The charging speed is...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,positive


## NLP Tokenization pipeline

In [90]:
text_df['review_content'] = text_df['review_content'].str.lower()

In [91]:
text_df['review_content']

0           looks durable charging is fine toono complains
1                                  charging is really fast
2                                            good product.
3                     till now satisfied with the quality.
4        this is a good product . the charging speed is...
                               ...                        
21956                                 it is a nice product
21957     easy to use. the grilling function is also wo...
21958                                    very good product
21959             this is a pretty powerful sandwich maker
21960                 for home use. the product looks good
Name: review_content, Length: 11273, dtype: object

## Normalization necessary functions

In [99]:
import unicodedata
import emoji
import re

def Unicode_normalization(text):
    text = unicodedata.normalize('NFKD', text)
    text = text.encode('ascii', 'ignore').decode('utf-8', 'ignore')

    return text


def emoji_normalization(text):
    return emoji.demojize(text, delimiters=(" ", " "))


def repeated_text_normalize(text):
    return re.sub(r'(.)\1{2.}', r'\1\1', text)


def whitespace_normalization(text):
    return re.sub(r'\s+', ' ', text).strip()



In [100]:
## pipeline function
def normalize_text(text):
    if pd.notna(text):
        return text
    
    text = Unicode_normalization(text)
    text = emoji_normalization(text)
    text = repeated_text_normalize(text)
    text = whitespace_normalization(text)

    return text

In [101]:
## applying to dataset

text_df['normalize_review'] = text_df['review_content'].apply(normalize_text)

In [102]:
text_df.head(3)

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link,sentiment,normalize_review
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,R3HXWT0LRP0NMF,Satisfied,looks durable charging is fine toono complains,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,positive,looks durable charging is fine toono complains
1,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AHMY5CWJMMK5BJRBBSNLYT3ONILA,Adarsh gupta,R2AJM3LFTLZHFO,Charging is really fast,charging is really fast,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,positive,charging is really fast
2,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,‚Çπ399,"‚Çπ1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,AHCTC6ULH4XB6YHDY6PCH2R772LQ,Sundeep,R6AQJGUP6P86,Value for money,good product.,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,positive,good product.


## Text Cleaning Process

In [103]:
def clean_punctuation(text):
    text = re.sub(r"[^\w\s!?]", "", text)
    return text