In [1]:
import pandas as pd
import re
import numpy as np

In [2]:
def inr_to_usd(price_inr):
    """
    Convert price from Indian Rupees (INR) to US Dollars (USD) based on the 
    exchange rate as of August 8, 2024.
    
    price_inr may include currency symbol ₹ and commas.
    """
    price_inr = float(str(price_inr).replace('₹', '').replace(',', ''))
    exchange_rate = 83.98
    if price_inr is not None:
        return round(price_inr / exchange_rate, 2)
    return None

def process_img_url(url):
    """
    Process image URLs to remove the segment between 'W/' and 'images/'.
    """
    if url:
        return re.sub(r'(W/[^/]+/images/)', '', url)
    return url

In [3]:
df = pd.read_csv('amazon_raw.csv', delimiter=',', quotechar='"')
df = df.rename(columns={
    'product_name': 'name',
    'discounted_price': 'price',
    'rating': 'rating',
    'rating_count': 'ratingNumber',
    'img_link': 'img'
})
df[['FirstCategory', 'SecondCategory', 'LeftCategory']] = df['category'].str.split('|', n=2, expand=True)
df = df[['name', 'price', 'rating', 'ratingNumber', 'img', 'FirstCategory', 'SecondCategory']]
df = df.dropna()
df = df.drop_duplicates(subset='name')
df = df[df['name'].apply(len) <= 500]
df['price'] = df['price'].apply(lambda x: inr_to_usd(x))
df['img'] = df['img'].apply(process_img_url)
np.random.seed(0) 
df['GetItByTomorrow'] = np.random.choice([True, False], size=len(df))
df.to_csv('amazon_processed.csv', index=False)

In [4]:
df

Unnamed: 0,name,price,rating,ratingNumber,img,FirstCategory,SecondCategory,GetItByTomorrow
0,Wayona Nylon Braided USB to Lightning Fast Cha...,4.75,4.2,24269,https://m.media-amazon.com/images/I/51UsScvHQN...,Computers&Accessories,Accessories&Peripherals,True
1,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,2.37,4.0,43994,https://m.media-amazon.com/images/I/31zOsqQOAO...,Computers&Accessories,Accessories&Peripherals,False
2,Sounce Fast Phone Charging Cable & Data Sync U...,2.37,3.9,7928,https://m.media-amazon.com/images/I/31IvNJZnmd...,Computers&Accessories,Accessories&Peripherals,False
3,boAt Deuce USB 300 2 in 1 Type-C & Micro USB S...,3.92,4.2,94363,https://m.media-amazon.com/images/I/41V5FtEWPk...,Computers&Accessories,Accessories&Peripherals,True
4,Portronics Konnect L 1.2M Fast Charging 3A 8 P...,1.83,4.2,16905,https://m.media-amazon.com/images/I/31VzNhhqif...,Computers&Accessories,Accessories&Peripherals,False
...,...,...,...,...,...,...,...,...
1460,Noir Aqua - 5pcs PP Spun Filter + 1 Spanner | ...,4.51,4,1090,https://m.media-amazon.com/images/I/41fDdRtjfx...,Home&Kitchen,Kitchen&HomeAppliances,True
1461,Prestige Delight PRWO Electric Rice Cooker (1 ...,27.15,4.1,4118,https://m.media-amazon.com/images/I/41gzDxk4+k...,Home&Kitchen,Kitchen&HomeAppliances,True
1462,Bajaj Majesty RX10 2000 Watts Heat Convector R...,26.42,3.6,468,https://m.media-amazon.com/images/I/41qmt2a159...,Home&Kitchen,"Heating,Cooling&AirQuality",False
1463,Havells Ventil Air DSP 230mm Exhaust Fan (Pist...,16.66,4,8031,https://m.media-amazon.com/images/I/51pNg1Zy4+...,Home&Kitchen,"Heating,Cooling&AirQuality",False
