In [43]:
import pandas as pd
from surprise import Dataset, Reader, KNNBasic, accuracy, SVD
from surprise.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [44]:
df = pd.read_csv("/Users/darwinye/Desktop/421 Data Mining/amazon.csv")

In [45]:
df.fillna(value="NA", inplace=True)  # Fill missing values with 'NA'
df["discounted_price"] = (
    df["discounted_price"].str.replace("₹", "").str.replace(",", "").astype(float)
)
df["actual_price"] = (
    df["actual_price"].str.replace("₹", "").str.replace(",", "").astype(float)
)

In [46]:
df["rating_count"] = df["rating_count"].replace("NA", pd.NA)

In [47]:
df.isna().sum()

product_id             0
product_name           0
category               0
discounted_price       0
actual_price           0
discount_percentage    0
rating                 0
rating_count           2
about_product          0
user_id                0
user_name              0
review_id              0
review_title           0
review_content         0
img_link               0
product_link           0
dtype: int64

In [48]:
print(df["rating"].unique())

['4.2' '4.0' '3.9' '4.1' '4.3' '4.4' '4.5' '3.7' '3.3' '3.6' '3.4' '3.8'
 '3.5' '4.6' '3.2' '5.0' '4.7' '3.0' '2.8' '4' '3.1' '4.8' '2.3' '|' '2'
 '3' '2.6' '2.9']


In [49]:
df["rating"] = pd.to_numeric(df["rating"], errors="coerce")
df.dropna(inplace=True)

In [50]:
df["rating_count"] = df["rating_count"].str.replace(",", "").astype(int)

In [51]:
df.head()

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,"AG3D6O4STAQKAY2UVGEUV46KN35Q,AHMY5CWJMMK5BJRBB...","Manav,Adarsh gupta,Sundeep,S.Sayeed Ahmed,jasp...","R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","Satisfied,Charging is really fast,Value for mo...",Looks durable Charging is fine tooNo complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
1,B098NS6PVG,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,199.0,349.0,43%,4.0,43994,"Compatible with all Type C enabled devices, be...","AECPFYFQVRUWC3KGNLJIOREFP5LQ,AGYYVPDD7YG7FYNBX...","ArdKn,Nirbhay kumar,Sagar Viswanathan,Asp,Plac...","RGIQEG07R9HS2,R1SMWZQ86XIN8U,R2J3Y1WL29GWDE,RY...","A Good Braided Cable for Your Type C Device,Go...",I ordered this cable to connect my phone to An...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...
2,B096MSW6CT,Sounce Fast Phone Charging Cable & Data Sync U...,Computers&Accessories|Accessories&Peripherals|...,199.0,1899.0,90%,3.9,7928,【 Fast Charger& Data Sync】-With built-in safet...,"AGU3BBQ2V2DDAMOAKGFAWDDQ6QHA,AESFLDV2PT363T2AQ...","Kunal,Himanshu,viswanath,sai niharka,saqib mal...","R3J3EQQ9TZI5ZJ,R3E7WBGK7ID0KV,RWU79XKQ6I1QF,R2...","Good speed for earlier versions,Good Product,W...","Not quite durable and sturdy,https://m.media-a...",https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...
3,B08HDJ86NZ,boAt Deuce USB 300 2 in 1 Type-C & Micro USB S...,Computers&Accessories|Accessories&Peripherals|...,329.0,699.0,53%,4.2,94363,The boAt Deuce USB 300 2 in 1 cable is compati...,"AEWAZDZZJLQUYVOVGBEUKSLXHQ5A,AG5HTSFRRE6NL3M5S...","Omkar dhale,JD,HEMALATHA,Ajwadh a.,amar singh ...","R3EEUZKKK9J36I,R3HJVYCLYOY554,REDECAZ7AMPQC,R1...","Good product,Good one,Nice,Really nice product...","Good product,long wire,Charges good,Nice,I bou...",https://m.media-amazon.com/images/I/41V5FtEWPk...,https://www.amazon.in/Deuce-300-Resistant-Tang...
4,B08CF3B7N1,Portronics Konnect L 1.2M Fast Charging 3A 8 P...,Computers&Accessories|Accessories&Peripherals|...,154.0,399.0,61%,4.2,16905,[CHARGE & SYNC FUNCTION]- This cable comes wit...,"AE3Q6KSUK5P75D5HFYHCRAOLODSA,AFUGIFH5ZAFXRDSZH...","rahuls6099,Swasat Borah,Ajay Wadke,Pranali,RVK...","R1BP4L2HH9TFUP,R16PVJEXKV6QZS,R2UPDB81N66T4P,R...","As good as original,Decent,Good one for second...","Bought this instead of original apple, does th...",https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Portronics-Konnect-POR-1...


In [52]:
df["product_name"] = df["product_name"].str.lower()

In [53]:
split_result = df["category"].str.split("|", expand=True)
split_result

Unnamed: 0,0,1,2,3,4,5,6
0,Computers&Accessories,Accessories&Peripherals,Cables&Accessories,Cables,USBCables,,
1,Computers&Accessories,Accessories&Peripherals,Cables&Accessories,Cables,USBCables,,
2,Computers&Accessories,Accessories&Peripherals,Cables&Accessories,Cables,USBCables,,
3,Computers&Accessories,Accessories&Peripherals,Cables&Accessories,Cables,USBCables,,
4,Computers&Accessories,Accessories&Peripherals,Cables&Accessories,Cables,USBCables,,
...,...,...,...,...,...,...,...
1460,Home&Kitchen,Kitchen&HomeAppliances,WaterPurifiers&Accessories,WaterPurifierAccessories,,,
1461,Home&Kitchen,Kitchen&HomeAppliances,SmallKitchenAppliances,Rice&PastaCookers,,,
1462,Home&Kitchen,"Heating,Cooling&AirQuality",RoomHeaters,HeatConvectors,,,
1463,Home&Kitchen,"Heating,Cooling&AirQuality",Fans,ExhaustFans,,,


In [54]:
unique_categories = set()

for col in split_result.columns:
    unique_categories.update(split_result[col].dropna().unique())

print(unique_categories)

{'CableConnectionProtectors', 'MusicalInstruments', 'RoomHeaters', 'Caddies', 'VacuumSealers', 'Paints', 'CoolingPads', 'WoodenPencils', 'RCACables', 'HardDriveAccessories', 'AutomobileAccessories', 'OfficeElectronics', 'Cradles', 'CameraPrivacyCovers', 'ScreenProtectors', 'Headphones', 'HeatConvectors', 'Fans', 'Vacuum,Cleaning&Ironing', 'AirPurifiers&Ionizers', 'PCSpeakers', 'Flashes&SelfieLights', 'On-Ear', 'Health&PersonalCare', 'Adapters&Multi-Outlets', 'Smartphones', 'CarAccessories', 'ExternalSolidStateDrives', 'OTGAdapters', 'Inks,Toners&Cartridges', 'BasicCases', 'Macro&RinglightFlashes', 'InductionCooktop', 'RechargeableBatteries', 'Printers,Inks&Accessories', 'Wet-DryVacuums', 'Kettles&HotWaterDispensers', 'Bags&Sleeves', 'CeilingFans', 'TripodLegs', 'Projectors', 'Photo&VideoAccessories', 'FountainPens', 'KitchenTools', 'EggBoilers', 'DigitalBathroomScales', 'Mills&Grinders', 'Sewing&EmbroideryMachines', 'Tripods&Monopods', 'OfficePaperProducts', 'Juicers', 'PhotoStudio&Lig

In [55]:
split_categories = df["category"].str.split("|", expand=True)

# Step 2: Stack those columns to get a single column of categories
stacked_categories = split_categories.stack()

# Reset the index to align with the new stacked categories
stacked_categories = stacked_categories.reset_index(drop=True)

# Rename the column to 'category'
stacked_categories.columns = ["category"]

# Display the cleaned categories
print(stacked_categories)

0         Computers&Accessories
1       Accessories&Peripherals
2            Cables&Accessories
3                        Cables
4                     USBCables
                 ...           
6286                ExhaustFans
6287               Home&Kitchen
6288     Kitchen&HomeAppliances
6289     SmallKitchenAppliances
6290             SandwichMakers
Length: 6291, dtype: object


In [56]:
df["discount_percentage"] = df["discount_percentage"].str.rstrip("%").astype(float)

In [57]:
df["about_product"] = df["about_product"].str.replace("[^\w\s]", "").str.lower()
df["review_title"] = df["review_title"].str.replace("[^\w\s]", "").str.lower()
df["review_content"] = df["review_content"].str.replace("[^\w\s]", "").str.lower()

In [58]:
df.head()

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
0,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,"AG3D6O4STAQKAY2UVGEUV46KN35Q,AHMY5CWJMMK5BJRBB...","Manav,Adarsh gupta,Sundeep,S.Sayeed Ahmed,jasp...","R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
1,B098NS6PVG,ambrane unbreakable 60w / 3a fast charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,199.0,349.0,43.0,4.0,43994,"compatible with all type c enabled devices, be...","AECPFYFQVRUWC3KGNLJIOREFP5LQ,AGYYVPDD7YG7FYNBX...","ArdKn,Nirbhay kumar,Sagar Viswanathan,Asp,Plac...","RGIQEG07R9HS2,R1SMWZQ86XIN8U,R2J3Y1WL29GWDE,RY...","a good braided cable for your type c device,go...",i ordered this cable to connect my phone to an...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...
2,B096MSW6CT,sounce fast phone charging cable & data sync u...,Computers&Accessories|Accessories&Peripherals|...,199.0,1899.0,90.0,3.9,7928,【 fast charger& data sync】-with built-in safet...,"AGU3BBQ2V2DDAMOAKGFAWDDQ6QHA,AESFLDV2PT363T2AQ...","Kunal,Himanshu,viswanath,sai niharka,saqib mal...","R3J3EQQ9TZI5ZJ,R3E7WBGK7ID0KV,RWU79XKQ6I1QF,R2...","good speed for earlier versions,good product,w...","not quite durable and sturdy,https://m.media-a...",https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...
3,B08HDJ86NZ,boat deuce usb 300 2 in 1 type-c & micro usb s...,Computers&Accessories|Accessories&Peripherals|...,329.0,699.0,53.0,4.2,94363,the boat deuce usb 300 2 in 1 cable is compati...,"AEWAZDZZJLQUYVOVGBEUKSLXHQ5A,AG5HTSFRRE6NL3M5S...","Omkar dhale,JD,HEMALATHA,Ajwadh a.,amar singh ...","R3EEUZKKK9J36I,R3HJVYCLYOY554,REDECAZ7AMPQC,R1...","good product,good one,nice,really nice product...","good product,long wire,charges good,nice,i bou...",https://m.media-amazon.com/images/I/41V5FtEWPk...,https://www.amazon.in/Deuce-300-Resistant-Tang...
4,B08CF3B7N1,portronics konnect l 1.2m fast charging 3a 8 p...,Computers&Accessories|Accessories&Peripherals|...,154.0,399.0,61.0,4.2,16905,[charge & sync function]- this cable comes wit...,"AE3Q6KSUK5P75D5HFYHCRAOLODSA,AFUGIFH5ZAFXRDSZH...","rahuls6099,Swasat Borah,Ajay Wadke,Pranali,RVK...","R1BP4L2HH9TFUP,R16PVJEXKV6QZS,R2UPDB81N66T4P,R...","as good as original,decent,good one for second...","bought this instead of original apple, does th...",https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Portronics-Konnect-POR-1...


In [59]:
first_row = df.iloc[0]

# Count the number of different user IDs
user_ids = first_row["user_id"].split(",")
num_unique_user_ids = len(set(user_ids))

# Count the number of different user names
user_names = first_row["user_name"].split(",")
num_unique_user_names = len(set(user_names))

print("Number of different user IDs:", num_unique_user_ids)
print("Number of different user names:", num_unique_user_names)

Number of different user IDs: 8
Number of different user names: 8


In [60]:
def split_users(row):
    user_ids = row["user_id"].split(",")
    user_names = row["user_name"].split(",")
    rows = []
    for uid, uname in zip(user_ids, user_names):
        row_copy = row.copy()
        row_copy["user_id"] = uid
        row_copy["user_name"] = uname
        rows.append(row_copy)
    return pd.DataFrame(rows)


# Apply the function to each row and concatenate the results
df_new = pd.concat([split_users(row) for _, row in df.iterrows()], ignore_index=True)

# Display the DataFrame
df_new

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
0,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
1,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,AHMY5CWJMMK5BJRBBSNLYT3ONILA,Adarsh gupta,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
2,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,AHCTC6ULH4XB6YHDY6PCH2R772LQ,Sundeep,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
3,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,AGYHHIERNXKA6P5T7CZLXKVPT7IQ,S.Sayeed Ahmed,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
4,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,AG4OGOFWXJZTQ2HKYIOCOY3KXF2Q,jaspreet singh,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11488,B01486F4G6,borosil jumbo 1000-watt grill sandwich maker (...,Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,2863.0,3690.0,22.0,4.3,6987,"brand-borosil, specification â€“ 23v ~ 5hz;1 w...",AHXCDNSXAESERITAFELQABFVNLCA,PARDEEP,"R20RBRZ0WEUJT9,ROKIFK9R2ISSE,R30EEG2FNJSN5I,R2...","works perfect,ok good product,nice product. re...",it does it job perfectly..only issue is temp c...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Borosil-Jumbo-1000-Watt-...
11489,B01486F4G6,borosil jumbo 1000-watt grill sandwich maker (...,Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,2863.0,3690.0,22.0,4.3,6987,"brand-borosil, specification â€“ 23v ~ 5hz;1 w...",AGRZD6CHLCUNOLMMIMIHUCG7PIFA,Anindya Pramanik,"R20RBRZ0WEUJT9,ROKIFK9R2ISSE,R30EEG2FNJSN5I,R2...","works perfect,ok good product,nice product. re...",it does it job perfectly..only issue is temp c...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Borosil-Jumbo-1000-Watt-...
11490,B01486F4G6,borosil jumbo 1000-watt grill sandwich maker (...,Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,2863.0,3690.0,22.0,4.3,6987,"brand-borosil, specification â€“ 23v ~ 5hz;1 w...",AFQZVGSOSOJHKFQQMCEI4725QEKQ,Vikas Singh,"R20RBRZ0WEUJT9,ROKIFK9R2ISSE,R30EEG2FNJSN5I,R2...","works perfect,ok good product,nice product. re...",it does it job perfectly..only issue is temp c...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Borosil-Jumbo-1000-Watt-...
11491,B01486F4G6,borosil jumbo 1000-watt grill sandwich maker (...,Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,2863.0,3690.0,22.0,4.3,6987,"brand-borosil, specification â€“ 23v ~ 5hz;1 w...",AEALVGXXIP46OZVXKRUXSDWZJMEA,Harshada Pimple,"R20RBRZ0WEUJT9,ROKIFK9R2ISSE,R30EEG2FNJSN5I,R2...","works perfect,ok good product,nice product. re...",it does it job perfectly..only issue is temp c...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Borosil-Jumbo-1000-Watt-...


In [61]:
df_new[df_new["user_name"] == "Manav"]

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
0,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
336,B07JW1Y6XV,wayona nylon braided 3a lightning to usb a syn...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64.0,4.2,24269,"[high compatibility] : iphone x/xsmax/xr ,ipho...",AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LB1-Sy...
640,B07LGT55SJ,wayona usb nylon braided data sync and chargin...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64.0,4.2,24269,[high compatibility] : compatible for iphone x...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-Syncing-C...
712,B07JH1C41D,wayona nylon braided (2 pack) lightning fast u...,Computers&Accessories|Accessories&Peripherals|...,649.0,1999.0,68.0,4.2,24269,"[high compatibility] : iphone x/xsmax/xr ,ipho...",AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/I/412fvb7k2F...,https://www.amazon.in/Wayona-Braided-WN3LG2-Sy...
848,B07JGDB5M1,wayona nylon braided 2m / 6ft fast charge usb ...,Computers&Accessories|Accessories&Peripherals|...,449.0,1299.0,65.0,4.2,24269,"[high compatibility] : phone x/xsmax/xr ,phone...",AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN6LG1-Sy...
1738,B07JH1CBGW,wayona nylon braided usb syncing and charging ...,Computers&Accessories|Accessories&Peripherals|...,649.0,1999.0,68.0,4.2,24269,"[high compatibility] : iphone x/xsmax/xr ,ipho...",AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/I/41eHLj-wfG...,https://www.amazon.in/Wayona-Braided-WN3LB2-Sy...
2880,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64.0,4.2,24270,high compatibility : compatible with iphone 12...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/I/51UsScvHQN...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
4793,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
6393,B09MZ6WZ6V,inovera world map extended anti slip rubber ga...,Computers&Accessories|Accessories&Peripherals|...,499.0,999.0,50.0,4.4,1030,large size design makes it provide plenty of r...,AFO3ZXLJKCVDKFHGSXAFA6AYDE5A,Manav,"R116YMD72TSY5Z,R258CFU2YKTK58,R24DFHVPXSIU8W,R...","nice design.,worth the buy,pretty good,nice pr...",nice design and print. but hard to differentia...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/INOVERA-Extended-Rubber-...
6535,B01DGVKBC6,"fedus cat6 ethernet cable, 10 meter high speed...",Computers&Accessories|Accessories&Peripherals|...,287.0,499.0,42.0,4.4,8076,✔️high-performance internet cable is cat6 rate...,AGAZYH7WC2DAX4C5Q2IJ3QNNTXKQ,Manav,"R3J8OMTJB5P038,R1ZFZHJQD4WTQL,R3U6Q310IX6DDS,R...","very good performance,it's been more than a mo...",really it is solid and effeciant. net signal i...,https://m.media-amazon.com/images/I/51jNo4QNTN...,https://www.amazon.in/Technotech-Ethernet-Netw...


In [62]:
print(df_new["category"].unique())

['Computers&Accessories|Accessories&Peripherals|Cables&Accessories|Cables|USBCables'
 'Computers&Accessories|NetworkingDevices|NetworkAdapters|WirelessUSBAdapters'
 'Electronics|HomeTheater,TV&Video|Accessories|Cables|HDMICables'
 'Electronics|HomeTheater,TV&Video|Televisions|SmartTelevisions'
 'Electronics|HomeTheater,TV&Video|Accessories|RemoteControls'
 'Electronics|HomeTheater,TV&Video|Televisions|StandardTelevisions'
 'Electronics|HomeTheater,TV&Video|Accessories|TVMounts,Stands&Turntables|TVWall&CeilingMounts'
 'Electronics|HomeTheater,TV&Video|Accessories|Cables|RCACables'
 'Electronics|HomeAudio|Accessories|SpeakerAccessories|Mounts'
 'Electronics|HomeTheater,TV&Video|Accessories|Cables|OpticalCables'
 'Electronics|HomeTheater,TV&Video|Projectors'
 'Electronics|HomeAudio|Accessories|Adapters'
 'Electronics|HomeTheater,TV&Video|SatelliteEquipment|SatelliteReceivers'
 'Computers&Accessories|Accessories&Peripherals|Cables&Accessories|Cables|DVICables'
 'Electronics|HomeTheater,TV&

In [63]:
def extract_first_last(category):
    categories = category.split("|")
    first_item = categories[0]
    last_item = categories[-1]
    return first_item, last_item


# Apply the function to create new columns
df_new[["First_category", "Last_category"]] = df_new["category"].apply(
    lambda x: pd.Series(extract_first_last(x))
)
df_new.drop("category", axis=1, inplace=True)

In [64]:
df_new.head()

Unnamed: 0,product_id,product_name,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link,First_category,Last_category
0,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,AG3D6O4STAQKAY2UVGEUV46KN35Q,Manav,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,Computers&Accessories,USBCables
1,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,AHMY5CWJMMK5BJRBBSNLYT3ONILA,Adarsh gupta,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,Computers&Accessories,USBCables
2,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,AHCTC6ULH4XB6YHDY6PCH2R772LQ,Sundeep,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,Computers&Accessories,USBCables
3,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,AGYHHIERNXKA6P5T7CZLXKVPT7IQ,S.Sayeed Ahmed,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,Computers&Accessories,USBCables
4,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,AG4OGOFWXJZTQ2HKYIOCOY3KXF2Q,jaspreet singh,"R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,Computers&Accessories,USBCables


In [65]:
df_new.drop(
    columns=["product_name", "user_name", "img_link", "product_link"], inplace=True
)

In [70]:
df.head()

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
0,B07JW9H4J1,wayona nylon braided usb to lightning fast cha...,Computers&Accessories|Accessories&Peripherals|...,399.0,1099.0,64.0,4.2,24269,high compatibility : compatible with iphone 12...,"AG3D6O4STAQKAY2UVGEUV46KN35Q,AHMY5CWJMMK5BJRBB...","Manav,Adarsh gupta,Sundeep,S.Sayeed Ahmed,jasp...","R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","satisfied,charging is really fast,value for mo...",looks durable charging is fine toono complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
1,B098NS6PVG,ambrane unbreakable 60w / 3a fast charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,199.0,349.0,43.0,4.0,43994,"compatible with all type c enabled devices, be...","AECPFYFQVRUWC3KGNLJIOREFP5LQ,AGYYVPDD7YG7FYNBX...","ArdKn,Nirbhay kumar,Sagar Viswanathan,Asp,Plac...","RGIQEG07R9HS2,R1SMWZQ86XIN8U,R2J3Y1WL29GWDE,RY...","a good braided cable for your type c device,go...",i ordered this cable to connect my phone to an...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...
2,B096MSW6CT,sounce fast phone charging cable & data sync u...,Computers&Accessories|Accessories&Peripherals|...,199.0,1899.0,90.0,3.9,7928,【 fast charger& data sync】-with built-in safet...,"AGU3BBQ2V2DDAMOAKGFAWDDQ6QHA,AESFLDV2PT363T2AQ...","Kunal,Himanshu,viswanath,sai niharka,saqib mal...","R3J3EQQ9TZI5ZJ,R3E7WBGK7ID0KV,RWU79XKQ6I1QF,R2...","good speed for earlier versions,good product,w...","not quite durable and sturdy,https://m.media-a...",https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...
3,B08HDJ86NZ,boat deuce usb 300 2 in 1 type-c & micro usb s...,Computers&Accessories|Accessories&Peripherals|...,329.0,699.0,53.0,4.2,94363,the boat deuce usb 300 2 in 1 cable is compati...,"AEWAZDZZJLQUYVOVGBEUKSLXHQ5A,AG5HTSFRRE6NL3M5S...","Omkar dhale,JD,HEMALATHA,Ajwadh a.,amar singh ...","R3EEUZKKK9J36I,R3HJVYCLYOY554,REDECAZ7AMPQC,R1...","good product,good one,nice,really nice product...","good product,long wire,charges good,nice,i bou...",https://m.media-amazon.com/images/I/41V5FtEWPk...,https://www.amazon.in/Deuce-300-Resistant-Tang...
4,B08CF3B7N1,portronics konnect l 1.2m fast charging 3a 8 p...,Computers&Accessories|Accessories&Peripherals|...,154.0,399.0,61.0,4.2,16905,[charge & sync function]- this cable comes wit...,"AE3Q6KSUK5P75D5HFYHCRAOLODSA,AFUGIFH5ZAFXRDSZH...","rahuls6099,Swasat Borah,Ajay Wadke,Pranali,RVK...","R1BP4L2HH9TFUP,R16PVJEXKV6QZS,R2UPDB81N66T4P,R...","as good as original,decent,good one for second...","bought this instead of original apple, does th...",https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Portronics-Konnect-POR-1...


In [66]:
frequency = df.groupby("user_id").size().reset_index(name="frequency")

# Monetary: Calculate the total spend for each customer
# We will sum up the 'actual_price' for each customer to get the Monetary value.
monetary = df.groupby("user_id")["actual_price"].sum().reset_index(name="monetary")

# Merge the Frequency and Monetary data frames to create an RFM data frame
rfm_df = pd.merge(frequency, monetary, on="user_id")

# Display the resulting RFM DataFrame
print(rfm_df)

                                                user_id  frequency  monetary
0     AE22Y3KIS7SE6LI3HE2VS6WWPU4Q,AHWEYO2IJ5I5GDWZA...          1     699.0
1     AE23RS3W7GZO7LHYKJU6KSKVM4MQ,AEQUNEY6GQOTEGUMS...          1    3210.0
2                          AE242TR3GQ6TYC6W4SJ5UYYKBTYQ          1     799.0
3     AE27UOZENYSWCQVQRRUQIV2ZM7VA,AGMYSLV6NNOAYES25...          4    6596.0
4     AE2JTMRKTUOIVIZWS2WDGTMNTU4Q,AF4QXCB32VC2DVE7O...          2    3998.0
...                                                 ...        ...       ...
1186  AHZFKWGDBRQKNMNQ4ZPL52OZBRKA,AGBEFVJFOQIRF7C7K...          1    2800.0
1187  AHZJHJWFZLYD64GVP4PXVI2F4LXA,AEUCRZPOISXKHXMCZ...          1   24999.0
1188  AHZNSNBVKQR4OGJAQHE4DCDA4YHA,AFBW6COTZXGHQMWVD...          2   19998.0
1189  AHZWJCVEIEI76H2VGMUSN5D735IQ,AH2DFUHFTG4CKQFVG...          3    5997.0
1190  AHZWXUWE3RGLDH4JJUK3HT3VMBJA,AFWUWJMEO4IQEMHKM...          1    9999.0

[1191 rows x 3 columns]


In [67]:
# Assign ranks from 1 to N for Frequency and Monetary values
# Higher values get higher ranks
rfm_df["F_Rank"] = rfm_df["frequency"].rank(method="first", ascending=False)
rfm_df["M_Rank"] = rfm_df["monetary"].rank(method="first", ascending=False)

# Divide the ranks into segments
rfm_df["F_Score"] = pd.qcut(
    rfm_df["F_Rank"], q=4, labels=f_labels, duplicates="drop"
).astype(int)
rfm_df["M_Score"] = pd.qcut(
    rfm_df["M_Rank"], q=4, labels=m_labels, duplicates="drop"
).astype(int)

# Create the RFM Segment and RFM Score as before
rfm_df["RFM_Segment"] = rfm_df["F_Score"].astype(str) + rfm_df["M_Score"].astype(str)
rfm_df["RFM_Score"] = rfm_df[["F_Score", "M_Score"]].sum(axis=1)

# Create general segments based on the total score
rfm_df["General_Segment"] = pd.qcut(
    rfm_df["RFM_Score"],
    q=[0, 0.1, 0.3, 1.0],
    labels=["Gold", "Silver", "Bronze"],
    duplicates="drop",
)

# Print the head of the RFM DataFrame to check the results
print(rfm_df.head())

                                             user_id  frequency  monetary  \
0  AE22Y3KIS7SE6LI3HE2VS6WWPU4Q,AHWEYO2IJ5I5GDWZA...          1     699.0   
1  AE23RS3W7GZO7LHYKJU6KSKVM4MQ,AEQUNEY6GQOTEGUMS...          1    3210.0   
2                       AE242TR3GQ6TYC6W4SJ5UYYKBTYQ          1     799.0   
3  AE27UOZENYSWCQVQRRUQIV2ZM7VA,AGMYSLV6NNOAYES25...          4    6596.0   
4  AE2JTMRKTUOIVIZWS2WDGTMNTU4Q,AF4QXCB32VC2DVE7O...          2    3998.0   

   F_Rank  M_Rank  F_Score  M_Score RFM_Segment  RFM_Score General_Segment  
0   145.0   972.0        1        4          14          5          Bronze  
1   146.0   382.0        1        2          12          3            Gold  
2   147.0   937.0        1        4          14          5          Bronze  
3    15.0   201.0        1        1          11          2            Gold  
4    70.0   314.0        1        2          12          3            Gold  


In [72]:

# Analyze the characteristics of each segment
segment_analysis = rfm_df.groupby('General_Segment').agg({
    'frequency': ['mean', 'min', 'max'],
    'monetary': ['mean', 'min', 'max'],
    'user_id': 'count'  # Count of users in each segment
}).reset_index()

# Rename columns for readability
segment_analysis.columns = ['General_Segment', 'Average_Frequency', 'Min_Frequency', 'Max_Frequency', 
                            'Average_Monetary', 'Min_Monetary', 'Max_Monetary', 'Customer_Count']

# Output the segment analysis to see what each segment looks like
print(segment_analysis)


  General_Segment  Average_Frequency  Min_Frequency  Max_Frequency  \
0            Gold           1.932271              1             10   
1          Silver           1.157303              1              3   
2          Bronze           1.011811              1              2   

   Average_Monetary  Min_Monetary  Max_Monetary  Customer_Count  
0      19805.146056        1995.0      241600.0             251  
1       6964.857640         999.0       75990.0             178  
2       2311.782100          39.0       59900.0             762  


  segment_analysis = rfm_df.groupby('General_Segment').agg({


Gold Segment

•	Average Frequency: Customers in this segment make purchases more often than those in other segments, with an average frequency of about 1.93. This suggests a high level of engagement with your products or services.
•	Monetary Values: They also spend significantly more, with an average monetary value of 19,805, which is much higher than the other segments. The range of spending is from 1,995 to 241,600, indicating a diverse group of high spenders that may include both consistently high-value purchasers as well as those making occasional large purchases.
•	Customer Count: The Gold segment is substantial in size, with 251 customers. Considering their high frequency and monetary value, these customers are likely the most valuable to your business and should be the primary focus for retention and loyalty programs.

Silver Segment

•	Average Frequency: This segment shops less frequently, with an average frequency of about 1.16. This indicates occasional purchasing patterns.
•	Monetary Values: Their spending is moderate, with an average monetary value of 6,964.85, ranging from 999 to 75,990. This suggests a mix of mid-tier customers who have the potential to be nurtured into more frequent and higher-value purchasers.
•	Customer Count: There are 178 customers in the Silver segment. This group may include newer customers or those with specific needs. Tailored marketing campaigns could encourage them to increase their frequency and spending.

Bronze Segment

•	Average Frequency: Customers in this segment have the lowest average frequency of about 1.01, implying that they typically make single or very few purchases.
•	Monetary Values: Their average monetary value is the lowest at 2,311.78, with spending ranging from 39 to 59,900. Despite the low average spend, the wide range suggests some Bronze customers occasionally make significant purchases.
•	Customer Count: This is the largest segment with 762 customers, but given their low engagement and spending, the focus should be on re-engaging them through special offers or by understanding their low activity through surveys and feedback.

Implications for Marketing Strategy

•	For the Gold segment, your strategy should focus on exclusivity and recognition. Since they are frequent and high-value customers, personalized services, exclusive offers, and premium loyalty programs are effective. They are likely to respond well to upselling and cross-selling strategies.
•	The Silver segment requires strategies that encourage them to purchase more frequently and potentially spend more per transaction. This could include offering them bundle deals, limited-time offers, or loyalty points that increase in value with frequency of purchase.
•	For the Bronze segment, the aim should be to understand their needs and reasons behind the infrequent and low-value purchases. Since this segment is quite large, there is a significant opportunity to move them up to higher segments. Initial engagement strategies such as first-time buyer offers or feedback surveys can help in increasing their activity.
