In [39]:
import pandas as pd
import numpy as np
import re

In [137]:
df = pd.read_csv(r"D:\VS Python\venv-selenium\JIOMART\jiomart_tvs.csv", encoding='utf-8')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 729 entries, 0 to 728
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   name              729 non-null    object
 1   price_discounted  714 non-null    object
 2   price_original    710 non-null    object
 3   offer_percent     708 non-null    object
 4   bank_offer        200 non-null    object
 5   limited_time      98 non-null     object
 6   position          729 non-null    int64 
dtypes: int64(1), object(6)
memory usage: 40.0+ KB


In [138]:
df['bank_offer'] = df['bank_offer'].fillna('No Bank Offer')
df['bank_offer']

0      Flat 5% Instant Discount On HDFC, ICICI, AXIS,...
1      Flat 5% Instant Discount On HDFC, ICICI, AXIS,...
2                                    Limited Offer Price
3      Flat 5% Instant Discount On HDFC, ICICI, AXIS,...
4                                          No Bank Offer
                             ...                        
724                                        No Bank Offer
725                                        No Bank Offer
726       Instant Discount On HDFC, ICICI, AXIS, SBI CC*
727                                        No Bank Offer
728                                        No Bank Offer
Name: bank_offer, Length: 729, dtype: object

In [139]:
# Extract details from 'name'
def extract_details(name):
    brand = name.split()[0]
    
    size_cm_match = re.search(r'(\d+(\.\d+)?)\s*cm', name)
    size_inch_match = re.search(r'\((\d+)\s*(inch|inches)?\)', name, re.IGNORECASE)
    resolution_match = re.search(r'\b(HD Ready|FHD|Full HD|UHD|4K)\b', name, re.IGNORECASE)
    
    size_cm = float(size_cm_match.group(1)) if size_cm_match else None
    size_inch = int(size_inch_match.group(1)) if size_inch_match else None
    resolution = resolution_match.group(1).upper() if resolution_match else None

    if not resolution and size_inch == 32:
        resolution = "HD READY"
    elif not resolution:
        resolution = "Not Mentioned"

    if 'TV' in name.upper():
        category = 'Television'
    elif 'PROJECTOR' in name.upper():
        category = 'Projector'
    else:
        category = 'Others'

    return pd.Series({
        'brand': brand,
        'size_cm': size_cm,
        'size_inch': size_inch,
        'resolution': resolution,
        'category': category
    })

# ✅ Use concat instead of join to avoid index mismatch
extracted_df = df['name'].apply(extract_details)
df = pd.concat([df, extracted_df], axis=1)
df

Unnamed: 0,name,price_discounted,price_original,offer_percent,bank_offer,limited_time,position,brand,size_cm,size_inch,resolution,category
0,"Redmi 81.28 cm (32 inch) Smart Fire TV, Black,...","₹10,490.00","₹24,999.00",58% OFF,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal,1,Redmi,81.28,32.0,HD READY,Television
1,"Foxsky 80 cm (32 inch) HD Ready LED Smart TV, ...","₹6,999.00","₹20,999.00",66% OFF,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal,2,Foxsky,80.00,32.0,HD READY,Television
2,Samsung wondertainment 80 cm (32 Inch) HD Read...,"₹14,490.00","₹18,990.00",23% OFF,Limited Offer Price,Limited Time Deal,3,Samsung,80.00,32.0,HD READY,Television
3,Samsung 108 cm (43 Inch) 4K Ultra HD Smart TV ...,"₹28,990.00","₹44,900.00",35% OFF,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal,4,Samsung,108.00,43.0,4K,Television
4,Hespa Mini Projector LED Home Theater with Rem...,"₹1,299.00","₹2,999.00",56% OFF,No Bank Offer,,5,Hespa,,,Not Mentioned,Projector
...,...,...,...,...,...,...,...,...,...,...,...,...
724,NVY 127 cm (50 Inch) UHD Smart Frameless LED T...,"₹49,900.00",,,No Bank Offer,,740,NVY,127.00,50.0,UHD,Television
725,"Itel 80 cm (32 inch) HD Ready Smart LED TV, G ...","₹10,999.00","₹22,990.00",52% OFF,No Bank Offer,,741,Itel,80.00,32.0,HD READY,Television
726,Kodak 126 cm (50 inch) Ultra HD (4K) LED Smart...,"₹27,498.00","₹33,999.00",19% OFF,"Instant Discount On HDFC, ICICI, AXIS, SBI CC*",,742,Kodak,126.00,50.0,4K,Television
727,ANKER Nebula Apollo Home Projector,"₹37,999.00","₹39,999.00",5% OFF,No Bank Offer,,743,ANKER,,,Not Mentioned,Projector


In [141]:
df['price_discounted(INR)'] = df['price_discounted'].str.replace('₹', '', regex=False).str.replace(',', '').astype(float)
df['price_original(INR)'] = df['price_original'].str.replace('₹', '', regex=False).str.replace(',', '').astype(float)



# Rename 'offer_percent' to 'discount(%)' and remove 'OFF'
df = df.rename(columns={'offer_percent': 'discount(%)'})
#df['discount(%)'] = df['discount(%)'].str.replace('% OFF', '', regex=False).astype(int)

In [142]:
df['discount(%)'] = (
    df['discount(%)']
    .str.replace('% OFF', '', regex=False)
    .fillna(0)
    .astype(int)
)

In [144]:
df = df.rename(columns={
    'name': 'Name of the Product',
    'category': 'Product Type',
    'brand': 'Brand Name',
    'price_discounted(INR)': 'Selling Price (INR)',
    'price_original(INR)': 'Original Price (INR)',
    'size_inch': 'Size (Inch)',
    'size_cm': 'Size (cm)',
    'resolution': 'Resolution',
    'discount(%)': 'Discount (%)',
    'bank_offer': 'Bank Offer',
    'limited_time': 'Limited Time Deal'
})
# Reorder columns
df = df[['Name of the Product', 'Brand Name', 'Product Type', 'Size (Inch)', 'Size (cm)', 
         'Resolution', 'Selling Price (INR)', 'Original Price (INR)', 
         'Discount (%)', 'Bank Offer', 'Limited Time Deal']]

In [145]:
df = df[['Name of the Product', 'Brand Name', 'Product Type', 'Selling Price (INR)','Original Price (INR)','Size (Inch)', 'Size (cm)', 
         'Resolution','Discount (%)', 'Bank Offer', 'Limited Time Deal']]

In [146]:
df

Unnamed: 0,Name of the Product,Brand Name,Product Type,Selling Price (INR),Original Price (INR),Size (Inch),Size (cm),Resolution,Discount (%),Bank Offer,Limited Time Deal
0,"Redmi 81.28 cm (32 inch) Smart Fire TV, Black,...",Redmi,Television,10490.0,24999.0,32.0,81.28,HD READY,58,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
1,"Foxsky 80 cm (32 inch) HD Ready LED Smart TV, ...",Foxsky,Television,6999.0,20999.0,32.0,80.00,HD READY,66,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
2,Samsung wondertainment 80 cm (32 Inch) HD Read...,Samsung,Television,14490.0,18990.0,32.0,80.00,HD READY,23,Limited Offer Price,Limited Time Deal
3,Samsung 108 cm (43 Inch) 4K Ultra HD Smart TV ...,Samsung,Television,28990.0,44900.0,43.0,108.00,4K,35,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
4,Hespa Mini Projector LED Home Theater with Rem...,Hespa,Projector,1299.0,2999.0,,,Not Mentioned,56,No Bank Offer,
...,...,...,...,...,...,...,...,...,...,...,...
724,NVY 127 cm (50 Inch) UHD Smart Frameless LED T...,NVY,Television,49900.0,,50.0,127.00,UHD,0,No Bank Offer,
725,"Itel 80 cm (32 inch) HD Ready Smart LED TV, G ...",Itel,Television,10999.0,22990.0,32.0,80.00,HD READY,52,No Bank Offer,
726,Kodak 126 cm (50 inch) Ultra HD (4K) LED Smart...,Kodak,Television,27498.0,33999.0,50.0,126.00,4K,19,"Instant Discount On HDFC, ICICI, AXIS, SBI CC*",
727,ANKER Nebula Apollo Home Projector,ANKER,Projector,37999.0,39999.0,,,Not Mentioned,5,No Bank Offer,


In [147]:
df.to_csv("jiomart_tvs2.csv", index=False)

In [148]:
df.info()
# Display the first few rows of the DataFrame
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 729 entries, 0 to 728
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name of the Product   729 non-null    object 
 1   Brand Name            729 non-null    object 
 2   Product Type          729 non-null    object 
 3   Selling Price (INR)   714 non-null    float64
 4   Original Price (INR)  710 non-null    float64
 5   Size (Inch)           503 non-null    float64
 6   Size (cm)             517 non-null    float64
 7   Resolution            729 non-null    object 
 8   Discount (%)          729 non-null    int64  
 9   Bank Offer            729 non-null    object 
 10  Limited Time Deal     98 non-null     object 
dtypes: float64(4), int64(1), object(6)
memory usage: 62.8+ KB


Unnamed: 0,Name of the Product,Brand Name,Product Type,Selling Price (INR),Original Price (INR),Size (Inch),Size (cm),Resolution,Discount (%),Bank Offer,Limited Time Deal
0,"Redmi 81.28 cm (32 inch) Smart Fire TV, Black,...",Redmi,Television,10490.0,24999.0,32.0,81.28,HD READY,58,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
1,"Foxsky 80 cm (32 inch) HD Ready LED Smart TV, ...",Foxsky,Television,6999.0,20999.0,32.0,80.0,HD READY,66,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
2,Samsung wondertainment 80 cm (32 Inch) HD Read...,Samsung,Television,14490.0,18990.0,32.0,80.0,HD READY,23,Limited Offer Price,Limited Time Deal
3,Samsung 108 cm (43 Inch) 4K Ultra HD Smart TV ...,Samsung,Television,28990.0,44900.0,43.0,108.0,4K,35,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
4,Hespa Mini Projector LED Home Theater with Rem...,Hespa,Projector,1299.0,2999.0,,,Not Mentioned,56,No Bank Offer,


In [149]:
df['Limited Time Deal'] = df['Limited Time Deal'].fillna('No Limited Time Deal')
df['Original Price (INR)'] = df['Original Price (INR)'].fillna(df['Selling Price (INR)'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 729 entries, 0 to 728
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name of the Product   729 non-null    object 
 1   Brand Name            729 non-null    object 
 2   Product Type          729 non-null    object 
 3   Selling Price (INR)   714 non-null    float64
 4   Original Price (INR)  714 non-null    float64
 5   Size (Inch)           503 non-null    float64
 6   Size (cm)             517 non-null    float64
 7   Resolution            729 non-null    object 
 8   Discount (%)          729 non-null    int64  
 9   Bank Offer            729 non-null    object 
 10  Limited Time Deal     729 non-null    object 
dtypes: float64(4), int64(1), object(6)
memory usage: 62.8+ KB


In [150]:
df[df.isna().any(axis=1)]


Unnamed: 0,Name of the Product,Brand Name,Product Type,Selling Price (INR),Original Price (INR),Size (Inch),Size (cm),Resolution,Discount (%),Bank Offer,Limited Time Deal
4,Hespa Mini Projector LED Home Theater with Rem...,Hespa,Projector,1299.0,2999.0,,,Not Mentioned,56,No Bank Offer,No Limited Time Deal
14,Hespa Mini Projector LED Home Theater with Rem...,Hespa,Projector,1299.0,2999.0,,,Not Mentioned,56,No Bank Offer,No Limited Time Deal
36,Hespa Portable Mini Projector LED Home Theater...,Hespa,Projector,1299.0,2999.0,,,Not Mentioned,56,No Bank Offer,No Limited Time Deal
39,Hespa YG-300 HD Smart Mini Movie Projector LED...,Hespa,Projector,1299.0,2999.0,,,Not Mentioned,56,No Bank Offer,No Limited Time Deal
40,Hespa Full HD Mini Projector LED Home Theater ...,Hespa,Projector,1399.0,2999.0,,,FULL HD,53,No Bank Offer,No Limited Time Deal
...,...,...,...,...,...,...,...,...,...,...,...
719,"SFZ Mini Smart Projector for Home, Native 720p...",SFZ,Projector,,,,,4K,0,No Bank Offer,No Limited Time Deal
720,SFZ 1280x1080P 2K LCD LED 180 Rotation Portabl...,SFZ,Projector,,,,,Not Mentioned,0,No Bank Offer,No Limited Time Deal
721,"SFZ Portable Mini Projector, 4K 1080P Full HD ...",SFZ,Projector,,,,,4K,0,No Bank Offer,No Limited Time Deal
722,"SFZ 4K & 1080p Support, 270 Degree Rotatable P...",SFZ,Projector,,,,,4K,0,No Bank Offer,No Limited Time Deal


In [151]:
df_tvs = df[df['Product Type'] == 'Television']
df_tvs.info()
df_tvs.head()

<class 'pandas.core.frame.DataFrame'>
Index: 553 entries, 0 to 728
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name of the Product   553 non-null    object 
 1   Brand Name            553 non-null    object 
 2   Product Type          553 non-null    object 
 3   Selling Price (INR)   553 non-null    float64
 4   Original Price (INR)  553 non-null    float64
 5   Size (Inch)           501 non-null    float64
 6   Size (cm)             514 non-null    float64
 7   Resolution            553 non-null    object 
 8   Discount (%)          553 non-null    int64  
 9   Bank Offer            553 non-null    object 
 10  Limited Time Deal     553 non-null    object 
dtypes: float64(4), int64(1), object(6)
memory usage: 51.8+ KB


Unnamed: 0,Name of the Product,Brand Name,Product Type,Selling Price (INR),Original Price (INR),Size (Inch),Size (cm),Resolution,Discount (%),Bank Offer,Limited Time Deal
0,"Redmi 81.28 cm (32 inch) Smart Fire TV, Black,...",Redmi,Television,10490.0,24999.0,32.0,81.28,HD READY,58,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
1,"Foxsky 80 cm (32 inch) HD Ready LED Smart TV, ...",Foxsky,Television,6999.0,20999.0,32.0,80.0,HD READY,66,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
2,Samsung wondertainment 80 cm (32 Inch) HD Read...,Samsung,Television,14490.0,18990.0,32.0,80.0,HD READY,23,Limited Offer Price,Limited Time Deal
3,Samsung 108 cm (43 Inch) 4K Ultra HD Smart TV ...,Samsung,Television,28990.0,44900.0,43.0,108.0,4K,35,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
5,LG 139.7 cm (55 inch) Ultra HD (4K) LED Smart ...,LG,Television,41990.0,86990.0,55.0,139.7,4K,51,Limited Offer Price,Limited Time Deal


In [153]:
df_projectors = df[df['Product Type'] == 'Projector']

df_projectors = df_projectors[['Name of the Product', 'Brand Name', 'Selling Price (INR)', 'Original Price (INR)', 'Resolution', 'Discount (%)', 'Bank Offer', 'Limited Time Deal']]
df_projectors.bfill(inplace=True)

df_projectors.info()
df_projectors.groupby('Brand Name').size().sort_values(ascending=False)


<class 'pandas.core.frame.DataFrame'>
Index: 165 entries, 4 to 727
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name of the Product   165 non-null    object 
 1   Brand Name            165 non-null    object 
 2   Selling Price (INR)   165 non-null    float64
 3   Original Price (INR)  165 non-null    float64
 4   Resolution            165 non-null    object 
 5   Discount (%)          165 non-null    int64  
 6   Bank Offer            165 non-null    object 
 7   Limited Time Deal     165 non-null    object 
dtypes: float64(2), int64(1), object(5)
memory usage: 11.6+ KB


Brand Name
mPix           24
Hespa          20
eKus           18
ZuZu,          14
IBS            13
Tart            9
Omex            7
Tonzo           6
SFZ             5
TONZO           4
METRONAUT       3
YOTON           3
Trigent         3
XElectron       3
Lazervision     3
Ekus            3
Total           2
KSBOY           2
PIXPAQ          2
BenQ            2
VilenRay        2
ANKER           1
CLEGO           1
Balliatic       1
Ausha           1
AUSHA           1
Formovie        1
Prime           1
Pixpaq          1
E               1
RFV1            1
WZATCO          1
The             1
Torexo          1
Wanbo           1
X               1
amiciVision     1
Zuzu            1
dtype: int64

In [155]:
df_projectors["Brand Name"].unique()

array(['Hespa', 'eKus', 'Omex', 'mPix', 'Pixpaq', 'PIXPAQ', 'TONZO',
       'Tonzo', 'XElectron', 'WZATCO', 'VilenRay', 'Formovie', 'KSBOY',
       'IBS', 'AUSHA', 'Ekus', 'Prime', 'amiciVision', 'Trigent', 'YOTON',
       'METRONAUT', 'Total', 'ZuZu,', 'RFV1', 'Lazervision', 'CLEGO',
       'Torexo', 'Balliatic', 'X', 'Wanbo', 'BenQ', 'Ausha', 'E', 'Tart',
       'Zuzu', 'The', 'SFZ', 'ANKER'], dtype=object)

In [156]:
df_projectors["Brand Name"].replace("E", "E Gate" , inplace=True)
df_projectors["Brand Name"].replace("The", "The Intact Boon" , inplace=True)
df_projectors["Brand Name"].replace("X", "X Electron" , inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_projectors["Brand Name"].replace("E", "E Gate" , inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_projectors["Brand Name"].replace("The", "The Intact Boon" , inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the i

In [157]:
df_projectors["Brand Name"].unique()

array(['Hespa', 'eKus', 'Omex', 'mPix', 'Pixpaq', 'PIXPAQ', 'TONZO',
       'Tonzo', 'XElectron', 'WZATCO', 'VilenRay', 'Formovie', 'KSBOY',
       'IBS', 'AUSHA', 'Ekus', 'Prime', 'amiciVision', 'Trigent', 'YOTON',
       'METRONAUT', 'Total', 'ZuZu,', 'RFV1', 'Lazervision', 'CLEGO',
       'Torexo', 'Balliatic', 'X Electron', 'Wanbo', 'BenQ', 'Ausha',
       'E Gate', 'Tart', 'Zuzu', 'The Intact Boon', 'SFZ', 'ANKER'],
      dtype=object)

In [158]:
df_projectors.to_csv("jiomart_projectors.csv", index=False)

In [216]:

df_tvs_nan = df_tvs[df_tvs.isna().any(axis=1) == True].reset_index(drop=False)

df_tvs_nan.iloc[1:2, df_tvs_nan.columns.get_loc('Product Type')] = 'Projector'
df_tvs_nan.iloc[23:32, df_tvs_nan.columns.get_loc('Product Type')] = 'Projector'
df_tvs_nan.iloc[42:46, df_tvs_nan.columns.get_loc('Product Type')] = 'Projector'

df_p2 = df_tvs_nan[df_tvs_nan["Product Type"] == "Projector"]
df_p2
df_tvs_nan


Unnamed: 0,index,Name of the Product,Brand Name,Product Type,Selling Price (INR),Original Price (INR),Size (Inch),Size (cm),Resolution,Discount (%),Bank Offer,Limited Time Deal
0,68,"PIXPAQ OG Pro (JAPAN), Extreme Bright 1400 ANS...",PIXPAQ,Television,22999.0,26999.0,,,Not Mentioned,14,No Bank Offer,No Limited Time Deal
1,115,Willen 600 Lumens 1080 P Mini Portable Mini Pr...,Willen,Projector,2450.0,4999.0,,,Not Mentioned,50,No Bank Offer,No Limited Time Deal
2,163,BPL 80 cm 32 (inch) HD Linux Smart TV with Dol...,BPL,Television,11999.0,18500.0,,80.0,Not Mentioned,35,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",No Limited Time Deal
3,164,BPL 80 cm 32 (inch) HD Linux Smart TV with Dol...,BPL,Television,11999.0,18500.0,,80.0,Not Mentioned,35,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",No Limited Time Deal
4,240,"Haier 139.7 cm QLED Google Smart TV, Grey, 55Q6",Haier,Television,55990.0,99990.0,,139.7,Not Mentioned,44,"Flat 12.5% Instant Discount On HDFC, ICICI, AX...",No Limited Time Deal
5,241,"Haier 165 cm 4K UHD Google Smart TV, Black, M65UG",Haier,Television,64990.0,110990.0,,165.0,4K,41,"Flat 12.5% Instant Discount On HDFC, ICICI, AX...",No Limited Time Deal
6,242,"Haier 109 cm QLED Google Smart TV, Grey, 43Q6",Haier,Television,38590.0,69990.0,,109.0,Not Mentioned,44,"Flat 12.5% Instant Discount On HDFC, ICICI, AX...",No Limited Time Deal
7,243,"Haier 109 cm QLED Google Smart TV, Grey, 65Q6",Haier,Television,78989.0,135990.0,,109.0,Not Mentioned,41,"Flat 12.5% Instant Discount On HDFC, ICICI, AX...",No Limited Time Deal
8,292,TCL 75P635 75 inch (189 cm) 4K Ultra HD LED An...,TCL,Television,60990.0,254990.0,,189.0,4K,76,No Bank Offer,No Limited Time Deal
9,293,"Samsung 55 Neo QLED Smart LED TV, 55QN95C",Samsung,Television,139988.0,244900.0,,,Not Mentioned,42,No Bank Offer,No Limited Time Deal


In [None]:
# 1. Get NaN rows from df_tvs and retain their original index
df_tvs_nan = df_tvs[df_tvs.isna().any(axis=1)].copy()
df_tvs_nan = df_tvs_nan.reset_index()  # Keep original index in a column for later removal

# 2. Replace specific rows as Projectors
df_tvs_nan.loc[1:1, 'Product Type'] = 'Projector'
df_tvs_nan.loc[23:31, 'Product Type'] = 'Projector'
df_tvs_nan.loc[42:45, 'Product Type'] = 'Projector'

# 3. Get cleaned projector and TV rows
df_projectors = df_tvs_nan[df_tvs_nan["Product Type"] == "Projector"]
df_clean_tvs = df_tvs_nan[df_tvs_nan["Product Type"] == "Television"]

# 4. Drop original NaN rows from df_tvs using their old index
df_tvs_deduped = df_tvs.drop(index=df_tvs_nan['index'])

# 5. Drop the index column from cleaned nan data
df_tvs_nan.drop(columns=['index'], inplace=True)

# 6. Concatenate cleaned nan rows back to the deduplicated TV dataset
df_final = pd.concat([df_tvs_deduped, df_tvs_nan], ignore_index=True)

# 7. Optional: Check info
#f_tvs_deduped.info()
df_tvs_nan


NameError: name 'd' is not defined

In [219]:
df_projectors1 = df_projectors.copy()


# Combine two DataFrames by stacking rows
combined_df = pd.concat([df_projectors1, df_p2], ignore_index=True)
combined_df = combined_df[['Name of the Product', 'Brand Name','Product Type','Selling Price (INR)', 'Original Price (INR)', 'Resolution', 'Discount (%)', 'Bank Offer', 'Limited Time Deal']]
combined_df["Resolution"].replace("Not Mentioned", "HD READY/FULL HD" , inplace=True)
combined_df['Product Type'] == "Projector"

combined_df['Product Type'] = combined_df['Product Type'].fillna('Projector')


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  combined_df["Resolution"].replace("Not Mentioned", "HD READY/FULL HD" , inplace=True)


In [220]:
combined_df["Resolution"].unique()

array(['HD READY/FULL HD', '4K', 'FULL HD'], dtype=object)

In [221]:
combined_df.to_csv("jiomart_projectors_combined1.csv", index=False)

In [222]:
combined_df

Unnamed: 0,Name of the Product,Brand Name,Product Type,Selling Price (INR),Original Price (INR),Resolution,Discount (%),Bank Offer,Limited Time Deal
0,Willen 600 Lumens 1080 P Mini Portable Mini Pr...,Willen,Projector,2450.0,4999.0,HD READY/FULL HD,50,No Bank Offer,No Limited Time Deal
1,Trigent Andriod 5G WiFi Projector Home 4K Supp...,Trigent,Projector,7599.0,19999.0,4K,62,No Bank Offer,No Limited Time Deal
2,YOTON Andriod 5G WiFi Projector Home 4K Suppor...,YOTON,Projector,7599.0,15999.0,4K,52,No Bank Offer,No Limited Time Deal
3,Trigent Andriod 5G WiFi Projector Home 4K Supp...,Trigent,Projector,6649.0,14999.0,4K,55,No Bank Offer,No Limited Time Deal
4,Total Andriod 5G WiFi Projector Home 4K Suppor...,Total,Projector,6649.0,14999.0,4K,55,No Bank Offer,No Limited Time Deal
5,METRONAUT Andriod 5G WiFi Projector Home 4K Su...,METRONAUT,Projector,7599.0,19999.0,4K,62,No Bank Offer,No Limited Time Deal
6,Total Andriod 5G WiFi Projector Home 4K Suppor...,Total,Projector,7599.0,19999.0,4K,62,No Bank Offer,No Limited Time Deal
7,Total Mini ANDRIOD Projector for Home 4K Suppo...,Total,Projector,7599.0,19999.0,4K,62,No Bank Offer,No Limited Time Deal
8,Omex Upgrade Version Advance YouTube DLNA TV W...,Omex,Projector,4990.0,14900.0,HD READY/FULL HD,66,No Bank Offer,No Limited Time Deal
9,Torexo Sales T10 Full HD 1920*1080P Android Pr...,Torexo,Projector,12299.0,21999.0,FULL HD,44,No Bank Offer,No Limited Time Deal


In [201]:
df_tvs_nan["Product Type"].unique()
df_only_tvs_nan = df_tvs_nan[df_tvs_nan["Product Type"] == "Television"]
df_only_tvs_nan

Unnamed: 0,index,Name of the Product,Brand Name,Product Type,Selling Price (INR),Original Price (INR),Size (Inch),Size (cm),Resolution,Discount (%),Bank Offer,Limited Time Deal
0,68,"PIXPAQ OG Pro (JAPAN), Extreme Bright 1400 ANS...",PIXPAQ,Television,22999.0,26999.0,,,Not Mentioned,14,No Bank Offer,No Limited Time Deal
2,163,BPL 80 cm 32 (inch) HD Linux Smart TV with Dol...,BPL,Television,11999.0,18500.0,,80.0,Not Mentioned,35,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",No Limited Time Deal
3,164,BPL 80 cm 32 (inch) HD Linux Smart TV with Dol...,BPL,Television,11999.0,18500.0,,80.0,Not Mentioned,35,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",No Limited Time Deal
4,240,"Haier 139.7 cm QLED Google Smart TV, Grey, 55Q6",Haier,Television,55990.0,99990.0,,139.7,Not Mentioned,44,"Flat 12.5% Instant Discount On HDFC, ICICI, AX...",No Limited Time Deal
5,241,"Haier 165 cm 4K UHD Google Smart TV, Black, M65UG",Haier,Television,64990.0,110990.0,,165.0,4K,41,"Flat 12.5% Instant Discount On HDFC, ICICI, AX...",No Limited Time Deal
6,242,"Haier 109 cm QLED Google Smart TV, Grey, 43Q6",Haier,Television,38590.0,69990.0,,109.0,Not Mentioned,44,"Flat 12.5% Instant Discount On HDFC, ICICI, AX...",No Limited Time Deal
7,243,"Haier 109 cm QLED Google Smart TV, Grey, 65Q6",Haier,Television,78989.0,135990.0,,109.0,Not Mentioned,41,"Flat 12.5% Instant Discount On HDFC, ICICI, AX...",No Limited Time Deal
8,292,TCL 75P635 75 inch (189 cm) 4K Ultra HD LED An...,TCL,Television,60990.0,254990.0,,189.0,4K,76,No Bank Offer,No Limited Time Deal
9,293,"Samsung 55 Neo QLED Smart LED TV, 55QN95C",Samsung,Television,139988.0,244900.0,,,Not Mentioned,42,No Bank Offer,No Limited Time Deal
10,294,"Samsung 50 Neo QLED Smart LED TV, 50QN90C",Samsung,Television,84989.0,164900.0,,,Not Mentioned,48,No Bank Offer,No Limited Time Deal


In [202]:
df_only_tvs_nan.to_csv("jiomart_tvs_nan.csv", index=False)

In [215]:
df3 = pd.read_csv("jiomart_tvs_nan1.csv")
df2 = pd.concat([df_tvs, df3], ignore_index=True)
df2.drop(columns=['index'], inplace=True, errors='ignore')
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 589 entries, 0 to 588
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name of the Product   589 non-null    object 
 1   Brand Name            589 non-null    object 
 2   Product Type          589 non-null    object 
 3   Selling Price (INR)   589 non-null    float64
 4   Original Price (INR)  589 non-null    float64
 5   Size (Inch)           537 non-null    float64
 6   Size (cm)             550 non-null    float64
 7   Resolution            589 non-null    object 
 8   Discount (%)          589 non-null    int64  
 9   Bank Offer            589 non-null    object 
 10  Limited Time Deal     589 non-null    object 
dtypes: float64(4), int64(1), object(6)
memory usage: 50.7+ KB


In [206]:
df4 = pd.read_csv("jiomart_projectors_combined1.csv")
df4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 179 entries, 0 to 178
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name of the Product   179 non-null    object 
 1   Brand Name            179 non-null    object 
 2   Product Type          179 non-null    object 
 3   Selling Price (INR)   179 non-null    float64
 4   Original Price (INR)  179 non-null    float64
 5   Resolution            179 non-null    object 
 6   Discount (%)          179 non-null    int64  
 7   Bank Offer            179 non-null    object 
 8   Limited Time Deal     179 non-null    object 
dtypes: float64(2), int64(1), object(6)
memory usage: 12.7+ KB


In [207]:

df2["Selling Price (INR)"] = df2["Selling Price (INR)"].astype(int)
df2["Original Price (INR)"] = df2["Original Price (INR)"].astype(int)
df4["Selling Price (INR)"] = df4["Selling Price (INR)"].astype(int)
df4["Original Price (INR)"] = df4["Original Price (INR)"].astype(int)   


In [208]:
df4
df2.to_csv("jiomart_tvs_combined.csv", index=False)
df4.to_csv("jiomart_projectors_combined.csv", index=False)


In [209]:
df5 = pd.concat([df2, df4], ignore_index=True)


In [213]:
df5

Unnamed: 0,Name of the Product,Brand Name,Product Type,Selling Price (INR),Original Price (INR),Size (Inch),Size (cm),Resolution,Discount (%),Bank Offer,Limited Time Deal
0,"Redmi 81.28 cm (32 inch) Smart Fire TV, Black,...",Redmi,Television,10490,24999,32.0,81.28,HD READY,58,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
1,"Foxsky 80 cm (32 inch) HD Ready LED Smart TV, ...",Foxsky,Television,6999,20999,32.0,80.00,HD READY,66,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
2,Samsung wondertainment 80 cm (32 Inch) HD Read...,Samsung,Television,14490,18990,32.0,80.00,HD READY,23,Limited Offer Price,Limited Time Deal
3,Samsung 108 cm (43 Inch) 4K Ultra HD Smart TV ...,Samsung,Television,28990,44900,43.0,108.00,4K,35,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
4,LG 139.7 cm (55 inch) Ultra HD (4K) LED Smart ...,LG,Television,41990,86990,55.0,139.70,4K,51,Limited Offer Price,Limited Time Deal
...,...,...,...,...,...,...,...,...,...,...,...
763,Torexo Sales T10 Full HD 1920*1080P Android Pr...,Torexo,Projector,12299,21999,,,FULL HD,44,No Bank Offer,No Limited Time Deal
764,YOTON Andriod 5G WiFi Projector Home 4K Suppor...,YOTON,Projector,7599,19999,,,4K,62,No Bank Offer,No Limited Time Deal
765,"ZuZu, move for more better. 3D Full HD LED Pro...","ZuZu,",Projector,14500,29999,,,FULL HD,51,No Bank Offer,No Limited Time Deal
766,IBS Mini ANDRIOD Projector for Home 4K Support...,IBS,Projector,7599,19999,,,4K,62,No Bank Offer,No Limited Time Deal


In [212]:
df5.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name of the Product   768 non-null    object 
 1   Brand Name            768 non-null    object 
 2   Product Type          768 non-null    object 
 3   Selling Price (INR)   768 non-null    int64  
 4   Original Price (INR)  768 non-null    int64  
 5   Size (Inch)           537 non-null    float64
 6   Size (cm)             550 non-null    float64
 7   Resolution            768 non-null    object 
 8   Discount (%)          768 non-null    int64  
 9   Bank Offer            768 non-null    object 
 10  Limited Time Deal     768 non-null    object 
dtypes: float64(2), int64(3), object(6)
memory usage: 66.1+ KB


In [96]:
df5.to_csv("jiomart_tvs_projectors_combined.csv", index=False)

In [98]:
df5.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name of the Product   768 non-null    object 
 1   Brand Name            768 non-null    object 
 2   Product Type          768 non-null    object 
 3   Selling Price (INR)   768 non-null    int64  
 4   Original Price (INR)  768 non-null    int64  
 5   Size (Inch)           537 non-null    float64
 6   Size (cm)             550 non-null    float64
 7   Resolution            768 non-null    object 
 8   Discount (%)          768 non-null    int64  
 9   Bank Offer            768 non-null    object 
 10  Limited Time Deal     768 non-null    object 
dtypes: float64(2), int64(3), object(6)
memory usage: 66.1+ KB


In [100]:
df5.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name of the Product   768 non-null    object 
 1   Brand Name            768 non-null    object 
 2   Product Type          768 non-null    object 
 3   Selling Price (INR)   768 non-null    int64  
 4   Original Price (INR)  768 non-null    int64  
 5   Size (Inch)           537 non-null    float64
 6   Size (cm)             550 non-null    float64
 7   Resolution            768 non-null    object 
 8   Discount (%)          768 non-null    int64  
 9   Bank Offer            768 non-null    object 
 10  Limited Time Deal     768 non-null    object 
dtypes: float64(2), int64(3), object(6)
memory usage: 66.1+ KB


In [227]:
# Step 1: Get the original rows with NaNs in df_tvs (keep their index)
df_tvs_nan = df_tvs[df_tvs.isna().any(axis=1)].reset_index(drop=False)  # 'index' holds original row index

# Step 2: Read your manually cleaned file (with corrected sizes and product types)
df3 = pd.read_csv("jiomart_tvs_nan1.csv")

# Step 3: Drop the old versions of those rows from df_tvs using their original index
df_tvs_cleaned = df_tvs.drop(index=df_tvs_nan['index'])

# Step 4: Concatenate the cleaned rows back in
df_final = pd.concat([df_tvs_cleaned, df3], ignore_index=True)

# Step 5: Clean up any leftover 'index' column just in case
df_final.drop(columns=['index'], inplace=True, errors='ignore')

# Step 6: Optional – verify
df_final.info()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 537 entries, 0 to 536
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name of the Product   537 non-null    object 
 1   Brand Name            537 non-null    object 
 2   Product Type          537 non-null    object 
 3   Selling Price (INR)   537 non-null    float64
 4   Original Price (INR)  537 non-null    float64
 5   Size (Inch)           537 non-null    float64
 6   Size (cm)             537 non-null    float64
 7   Resolution            537 non-null    object 
 8   Discount (%)          537 non-null    int64  
 9   Bank Offer            537 non-null    object 
 10  Limited Time Deal     537 non-null    object 
dtypes: float64(4), int64(1), object(6)
memory usage: 46.3+ KB


In [229]:
df4

Unnamed: 0,Name of the Product,Brand Name,Product Type,Selling Price (INR),Original Price (INR),Resolution,Discount (%),Bank Offer,Limited Time Deal
0,Hespa Mini Projector LED Home Theater with Rem...,Hespa,Projector,1299,2999,HD READY/FULL HD,56,No Bank Offer,No Limited Time Deal
1,Hespa Mini Projector LED Home Theater with Rem...,Hespa,Projector,1299,2999,HD READY/FULL HD,56,No Bank Offer,No Limited Time Deal
2,Hespa Portable Mini Projector LED Home Theater...,Hespa,Projector,1299,2999,HD READY/FULL HD,56,No Bank Offer,No Limited Time Deal
3,Hespa YG-300 HD Smart Mini Movie Projector LED...,Hespa,Projector,1299,2999,HD READY/FULL HD,56,No Bank Offer,No Limited Time Deal
4,Hespa Full HD Mini Projector LED Home Theater ...,Hespa,Projector,1399,2999,FULL HD,53,No Bank Offer,No Limited Time Deal
...,...,...,...,...,...,...,...,...,...
174,Torexo Sales T10 Full HD 1920*1080P Android Pr...,Torexo,Projector,12299,21999,FULL HD,44,No Bank Offer,No Limited Time Deal
175,YOTON Andriod 5G WiFi Projector Home 4K Suppor...,YOTON,Projector,7599,19999,4K,62,No Bank Offer,No Limited Time Deal
176,"ZuZu, move for more better. 3D Full HD LED Pro...","ZuZu,",Projector,14500,29999,FULL HD,51,No Bank Offer,No Limited Time Deal
177,IBS Mini ANDRIOD Projector for Home 4K Support...,IBS,Projector,7599,19999,4K,62,No Bank Offer,No Limited Time Deal


In [230]:
df_final

Unnamed: 0,Name of the Product,Brand Name,Product Type,Selling Price (INR),Original Price (INR),Size (Inch),Size (cm),Resolution,Discount (%),Bank Offer,Limited Time Deal
0,"Redmi 81.28 cm (32 inch) Smart Fire TV, Black,...",Redmi,Television,10490.0,24999.0,32.0,81.28,HD READY,58,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
1,"Foxsky 80 cm (32 inch) HD Ready LED Smart TV, ...",Foxsky,Television,6999.0,20999.0,32.0,80.00,HD READY,66,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
2,Samsung wondertainment 80 cm (32 Inch) HD Read...,Samsung,Television,14490.0,18990.0,32.0,80.00,HD READY,23,Limited Offer Price,Limited Time Deal
3,Samsung 108 cm (43 Inch) 4K Ultra HD Smart TV ...,Samsung,Television,28990.0,44900.0,43.0,108.00,4K,35,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",Limited Time Deal
4,LG 139.7 cm (55 inch) Ultra HD (4K) LED Smart ...,LG,Television,41990.0,86990.0,55.0,139.70,4K,51,Limited Offer Price,Limited Time Deal
...,...,...,...,...,...,...,...,...,...,...,...
532,"Bush 32 inch (80 cm) HD Smart LED TV, Black, J...",Bush,Television,7999.0,17999.0,32.0,80.00,HD READY,55,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",No Limited Time Deal
533,"Bush 24 inch (60 cm) HD Normal LED TV, Black, ...",Bush,Television,6499.0,13999.0,24.0,60.00,HD READY,53,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",No Limited Time Deal
534,"Bush 50 inch (127 cm) 4K UHD Smart LED TV, Bla...",Bush,Television,25999.0,40999.0,50.0,127.00,4K,36,"Flat 5% Instant Discount On HDFC, ICICI, AXIS,...",No Limited Time Deal
535,"TCL 75 4K UHD Smart Google TV, 75P635 PRO",TCL,Television,79990.0,256990.0,75.0,189.00,4K,68,No Bank Offer,No Limited Time Deal


In [231]:
df_cleaned_total_tvs_projectors = pd.concat([df_final, df4], ignore_index=True)
df_cleaned_total_tvs_projectors.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 716 entries, 0 to 715
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name of the Product   716 non-null    object 
 1   Brand Name            716 non-null    object 
 2   Product Type          716 non-null    object 
 3   Selling Price (INR)   716 non-null    float64
 4   Original Price (INR)  716 non-null    float64
 5   Size (Inch)           537 non-null    float64
 6   Size (cm)             537 non-null    float64
 7   Resolution            716 non-null    object 
 8   Discount (%)          716 non-null    int64  
 9   Bank Offer            716 non-null    object 
 10  Limited Time Deal     716 non-null    object 
dtypes: float64(4), int64(1), object(6)
memory usage: 61.7+ KB


In [232]:
df_cleaned_total_tvs_projectors.to_csv("jiomart_tvs_projectors_combined_final.csv", index=False)    