## Laptops Data.

In [4]:
# Import libraries
import pandas as pd
import re

In [4]:
# Data loading
laptops_df = pd.read_csv("./data/laptops.csv")
laptops_df.head()

Unnamed: 0,Name,Price,Reviews,Ratings
0,"HP Refurbished EliteBook 640GB HDD, 8GB RAM, ...","KSh 20,300",3.7 out of 5(13),3.7 out of 5
1,"HP Refurbished EliteBook 820 Core I5, 8GB RAM ...","KSh 16,499",5 out of 5(1),5 out of 5
2,Lenovo ThinkPad T490 Touchscreen Core I5 -8th ...,"KSh 27,999",4 out of 5(9),4 out of 5
3,HP Refurbished EliteBook 840 G3 Intel Core I5 ...,"KSh 23,500",4 out of 5(1),4 out of 5
4,HP GAMING-LAPTOP -HP ELITEBOOK 755 AMD RYZEN 7...,"KSh 35,000",4.4 out of 5(9),4.4 out of 5


In [16]:
# Fridges data
df = pd.read_csv("./fridges.csv")
df.head(5)

Unnamed: 0,Name,Price,Reviews,Ratings
0,Ramtons RF/335 - 85L Single Door Refrigerator ...,"KSh 15,999",4.1 out of 5(66),4.1 out of 5
1,Hisense 94 Liters Single Door Fridge REF094DR ...,"KSh 16,799",4.3 out of 5(89),4.3 out of 5
2,"Ramtons RF/203, 2 Door Direct Cool Fridge, 128...","KSh 28,099",4.2 out of 5(69),4.2 out of 5
3,Roch RFR-120S-I Single Door Refrigerator - 90 ...,"KSh 16,299",4.2 out of 5(478),4.2 out of 5
4,Nunix 138L Double Door Fridge Energy Efficient...,"KSh 27,500",3.9 out of 5(149),3.9 out of 5


In [17]:
# Extract brand name and model.
def extract_brand_and_model(name):
    match = re.match(r"([A-Za-z]+(?: [A-Za-z]+)*)(?:\s[RF|REF|FM|DF|D|]{2,4}[\d]+)?", name)
    if match:
        return match.group(1).strip()
    return ''

# Extract size in litres
def extract_size(name):
    match = re.search(r'(\d+)\s*Liters?', name)
    if match:
        return int(match.group(1))
    return None

# Extract number of doors
def extract_doors(name):
    match = re.search(r'(\d+)\s*Door', name)
    if match:
        return int(match.group(1))
    return None

# Extract color
def extract_color(name):
    color_keywords = ['Silver', 'White', 'Black', 'Grey', 'Red', 'Blue', 'Green', 'Beige', 'Stainless', 'Chrome']
    for color in color_keywords:
        if color.lower() in name.lower():
            return color
    return None

# Extract warranty
def extract_warranty(name):
    match = re.search(r'(\d+)\s*YRs?\s*WRTY', name)
    if match:
        return int(match.group(1))
    return None

# Extract the price from the 'Price' column
def extract_price(price):
    match = re.search(r'KSh\s*(\d+([,]\d{3})*)', price)
    if match:
        return float(match.group(1).replace(',', ''))
    return None

# Extract reviews 
def extract_reviews(reviews):
    match = re.search(r'\((\d+)\)', reviews)
    if match:
        return int(match.group(1))
    return None

# Extract ratings (the number before "out of 5")
def extract_ratings(ratings):
    match = re.search(r'(\d+\.\d+)', ratings)
    if match:
        return float(match.group(1))
    return None

# Apply the extraction functions to the DataFrame
df['Brand'] = df['Name'].apply(extract_brand_and_model)
df['Size(Liters)'] = df['Name'].apply(extract_size)
df['Doors'] = df['Name'].apply(extract_doors)
df['Color'] = df['Name'].apply(extract_color)
df['Warranty(Years)'] = df['Name'].apply(extract_warranty)
df['Price'] = df['Price'].apply(extract_price)
df['Reviews'] = df['Reviews'].apply(extract_reviews)
df['Ratings'] = df['Ratings'].apply(extract_ratings)

# Save the modified DataFrame to a new CSV file
df.to_csv('fridges_clean.csv', index=False)

print("Data extraction and CSV creation completed successfully!")

Data extraction and CSV creation completed successfully!


In [18]:
clean_df = pd.read_csv("./fridges_clean.csv")
clean_df.head(100)

Unnamed: 0,Name,Price,Reviews,Ratings,Brand,Size(Liters),Doors,Color,Warranty(Years)
0,Ramtons RF/335 - 85L Single Door Refrigerator ...,15999.0,66,4.1,Ramtons RF,,,Silver,1.0
1,Hisense 94 Liters Single Door Fridge REF094DR ...,16799.0,89,4.3,Hisense,94.0,,,2.0
2,"Ramtons RF/203, 2 Door Direct Cool Fridge, 128...",28099.0,69,4.2,Ramtons RF,,2.0,Silver,1.0
3,Roch RFR-120S-I Single Door Refrigerator - 90 ...,16299.0,478,4.2,Roch RFR,,,Silver,
4,Nunix 138L Double Door Fridge Energy Efficient...,27500.0,149,3.9,Nunix,,,,1.0
...,...,...,...,...,...,...,...,...,...
95,Roch RFR-150DT-I Top-Mounted Defrost Fridge - ...,27699.0,39,4.1,Roch RFR,,,,1.0
96,Ramtons RF/174 - 2 Door Direct Cool Fridge - 1...,29500.0,58,4.3,Ramtons RF,,2.0,White,1.0
97,Nunix 138L Double Door Fridge Energy Efficient...,29993.0,5,3.4,Nunix,,,,
98,Hisense BLACK 176 Liters FRIDGE Single Door,33800.0,1,,Hisense BLACK,176.0,,Black,


In [10]:
df.isnull().sum()

Name                 0
Price                0
Reviews              0
Ratings            232
Brand                0
Size(Liters)       899
Doors              870
Color              435
Warranty(Years)    203
dtype: int64