## 1. Importing libraries

In [206]:
import pandas as pd
import numpy as np
import re

## 2. Exploratory Data Analysis

I have two categories of cookers dataset:
- Cooktops
- Standing Cooker

### Cooktops

In [207]:
# load the csv data
cooktops_df = pd.read_csv(r'..\data\scraped\kilimall_cooktops.csv')

# preview the df
cooktops_df.head()


Unnamed: 0,cooktop_name,cooktop_reviews,cooktop_price,product_link
0,IPCONE 7102 Double Gas Cooker Auto lgnition D...,(297),"KSh 1,469",https://www.kilimall.co.ke/listing/1000287239-...
1,【HOT！!】AILYONS/LYONS GS014-4 Glass Top Infrare...,(738),"KSh 2,699",https://www.kilimall.co.ke/listing/3054927-hot...
2,【HOT！】AILYONS/LYONS GS017 Gas Cooker Double Bu...,(708),"KSh 1,799",https://www.kilimall.co.ke/listing/2518153-hot...
3,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,(108),"KSh 2,099",https://www.kilimall.co.ke/listing/1000285967-...
4,AILYONS GS014-1/GS005A-1 Glass Top Infrared Do...,(1144),"KSh 2,699",https://www.kilimall.co.ke/listing/2306552-ail...


In [208]:
# check df shape
cooktops_df.shape

(4715, 4)

The 'cooktop_name' column contains emojis, the code below performs cleaning usin the Regular Expresion python module (RE).

- The regex pattern [^\w\s,.-] matches anything that is not a word character (\w), whitespace (\s), comma (,), period (.), or hyphen (-). This helps remove emojis and special characters while keeping alphanumeric characters and common punctuation.

In [209]:

# Function to remove emojis and special characters
def remove_emojis(text):
    return re.sub(r'[^\w\s,.-]', '', text)

# Apply the function to 'cooktop_name'
cooktops_df['clean_cooktop_name'] = cooktops_df['cooktop_name'].apply(remove_emojis)

# Save the 'clean_cooktop_name' column to a CSV file
#cooktops_df[['clean_cooktop_name']].to_csv(r'data\no_emojis_cooktop_name_column.csv', index=False)

cooktops_df.head()


Unnamed: 0,cooktop_name,cooktop_reviews,cooktop_price,product_link,clean_cooktop_name
0,IPCONE 7102 Double Gas Cooker Auto lgnition D...,(297),"KSh 1,469",https://www.kilimall.co.ke/listing/1000287239-...,IPCONE 7102 Double Gas Cooker Auto lgnition D...
1,【HOT！!】AILYONS/LYONS GS014-4 Glass Top Infrare...,(738),"KSh 2,699",https://www.kilimall.co.ke/listing/3054927-hot...,HOTAILYONSLYONS GS014-4 Glass Top Infrared Dou...
2,【HOT！】AILYONS/LYONS GS017 Gas Cooker Double Bu...,(708),"KSh 1,799",https://www.kilimall.co.ke/listing/2518153-hot...,HOTAILYONSLYONS GS017 Gas Cooker Double Burner...
3,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,(108),"KSh 2,099",https://www.kilimall.co.ke/listing/1000285967-...,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...
4,AILYONS GS014-1/GS005A-1 Glass Top Infrared Do...,(1144),"KSh 2,699",https://www.kilimall.co.ke/listing/2306552-ail...,AILYONS GS014-1GS005A-1 Glass Top Infrared Dou...


- In the above statistics, most cooktops on the website has 0 reviews, 4408 out of the total 4896 products.
- In pricing, the most frequent price was Ksh 2,999 that was repeated 118 times across the whole product list.

Check for duplicates

In [210]:
# check for duplicates across all columns
cooktop_duplicate_rows = cooktops_df[cooktops_df.duplicated()]
#cooktop_duplicate_rows

print(f'Number of duplicate rows: {cooktop_duplicate_rows.shape[0]}')


Number of duplicate rows: 18


In [211]:
# handling duplicates
cooktop_no_dupes_df = cooktops_df.drop_duplicates()

In [212]:
# recheck duplicates
cooktop_no_dupes_df.duplicated()


0       False
1       False
2       False
3       False
4       False
        ...  
4710    False
4711    False
4712    False
4713    False
4714    False
Length: 4697, dtype: bool

- The duplicates have been successfully handled, with the returned sum of duplicates as 0.

In [213]:
#check the shape to confirm number of rows after removing duplicates
cooktop_no_dupes_df.shape

(4697, 5)

Create a list of available brands and types to help with creating additional columns in the df

In [214]:
# list of available brands
cooktop_brands = ['generic', 'nunix', 'ailyons', 'eurochef', 'rashnik', 'sokany', 'ramtons', 'eurochef', 'mara', 'premier', 'sweet home', 'edison', 'sayona', 'roch', 'silvercrest', 'hisense', 'ipcone', 'kitchen37', 'toseeu', 'amaze', 'microsoft lumia', 'fashion king', 'mika', 'rebune', 'annov', 'euroken', 'hotpoint', 'jamesport', 'jtc', 'jikokoa', 'lenovo', 'sterling', 'u7', 'vitron', 'fenghua', '& other fairies', 'ahitar', 'bosch', 'gt sonic', 'rebune', 'thl', 'vention', 'weiqin', 'kilimall', 'armco', 'aucma', 'alldocube', 'amazon', 'androidly', 'Starlux', 'Lyons', 'Edenberg', 'boko', 'jiko okoa', 'xiaomi', 'euro chef', 'jiko koa', 'von', 'ampia', 'intex', 'veigapro', 'silver crest', 'amaize', 'jamespot', 'ilyons', 'ramtoms', 'ohms', 'velton', 'jx', 'sc']
cooktop_types = ['gas', 'electric', 'electric and gas', 'not specified']

print('Available number of cooktop brands from kilimall websites: ', len(cooktop_brands))
print(f'There are {len(cooktop_types)} different types of cooktops ')


Available number of cooktop brands from kilimall websites:  70
There are 4 different types of cooktops 


In [215]:
# extract brand and type from "clean_cooktop_name" using above lists

# 1. functions to match brands
def match_brand(cooktop_name):
    for brand in cooktop_brands:
        if brand.lower() in cooktop_name.lower():
            return brand
    return 'unknown'


# 2. function to match cooktop type
def match_type(cooktop_name):
    for cooktop_type in cooktop_types:
        if cooktop_type.lower() in cooktop_name.lower():
            return cooktop_type
    return 'unknown'

In [216]:
# Apply the functions to the DataFrame
cooktop_no_dupes_df = cooktop_no_dupes_df.copy()

cooktop_no_dupes_df.loc[:, 'brand'] = cooktop_no_dupes_df['clean_cooktop_name'].apply(match_brand)
cooktop_no_dupes_df.loc[:, 'cooktop_type'] = cooktop_no_dupes_df['clean_cooktop_name'].apply(match_type)
cooktop_no_dupes_df.head()

Unnamed: 0,cooktop_name,cooktop_reviews,cooktop_price,product_link,clean_cooktop_name,brand,cooktop_type
0,IPCONE 7102 Double Gas Cooker Auto lgnition D...,(297),"KSh 1,469",https://www.kilimall.co.ke/listing/1000287239-...,IPCONE 7102 Double Gas Cooker Auto lgnition D...,ipcone,gas
1,【HOT！!】AILYONS/LYONS GS014-4 Glass Top Infrare...,(738),"KSh 2,699",https://www.kilimall.co.ke/listing/3054927-hot...,HOTAILYONSLYONS GS014-4 Glass Top Infrared Dou...,ailyons,gas
2,【HOT！】AILYONS/LYONS GS017 Gas Cooker Double Bu...,(708),"KSh 1,799",https://www.kilimall.co.ke/listing/2518153-hot...,HOTAILYONSLYONS GS017 Gas Cooker Double Burner...,ailyons,gas
3,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,(108),"KSh 2,099",https://www.kilimall.co.ke/listing/1000285967-...,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,ipcone,gas
4,AILYONS GS014-1/GS005A-1 Glass Top Infrared Do...,(1144),"KSh 2,699",https://www.kilimall.co.ke/listing/2306552-ail...,AILYONS GS014-1GS005A-1 Glass Top Infrared Dou...,ailyons,gas


- The 'SettingWithCopyWarning' occurs in pandas when you try to modify a DataFrame that is a "view" of another DataFrame, rather than a copy. This can lead to unexpected results because changes made might not apply to the original DataFrame as intended. The warning is corrected by using '.loc' or creating a copy of the 'view' dataframe

In [217]:
# shape
cooktop_no_dupes_df.shape

(4697, 7)

In [218]:
# exploring 'cooktop_no_dupes'

cooktop_unique_brands = cooktop_no_dupes_df["brand"].unique()
print(cooktop_unique_brands)

#number of unique counts
cooktop_unique_brands_counts = cooktop_no_dupes_df["brand"].value_counts()
print("Frequency of available brands:\n", cooktop_unique_brands_counts)




['ipcone' 'ailyons' 'nunix' 'eurochef' 'generic' 'Starlux' 'Lyons'
 'unknown' 'sokany' 'rebune' 'jikokoa' 'jamespot' 'amaze' 'ramtons'
 'rashnik' 'roch' 'jiko koa' 'intex' 'velton' 'Edenberg' 'xiaomi'
 'jiko okoa' 'boko' 'premier' 'sayona' 'annov' 'jx' 'veigapro' 'ampia'
 'sc' 'euro chef' 'ohms' 'von' 'silver crest' 'mika' 'silvercrest'
 'amaize' 'ramtoms' 'hotpoint' 'hisense' 'sterling']
Frequency of available brands:
 brand
unknown         1224
nunix           1207
eurochef         460
ailyons          345
rashnik          252
sokany           210
jikokoa          125
amaze            121
generic           98
ipcone            97
ramtons           76
velton            75
veigapro          58
roch              57
Starlux           49
jiko koa          39
premier           27
rebune            25
silver crest      24
boko              19
jiko okoa         14
Lyons             12
sc                12
ampia             11
jamespot           8
silvercrest        8
sayona             8
von

In [219]:
# exploring 'cooktop_no_dupes' unknown brands
unknown_cooktop_brands = cooktop_no_dupes_df[cooktop_no_dupes_df['brand'] == 'unknown']
unknown_cooktop_brands.head()

Unnamed: 0,cooktop_name,cooktop_reviews,cooktop_price,product_link,clean_cooktop_name,brand,cooktop_type
31,Table Top Double Burner Gas Stove Cooker + Pip...,(237),"KSh 2,600",https://www.kilimall.co.ke/listing/2728809-tab...,Table Top Double Burner Gas Stove Cooker Pipe...,unknown,gas
33,G-003B TABLE COOKER STAINLESS STEEL TRIPPLE (3...,(1),"KSh 2,899",https://www.kilimall.co.ke/listing/1000317995-...,G-003B TABLE COOKER STAINLESS STEEL TRIPPLE 3 ...,unknown,gas
50,Tampered Glass Energy saving Table top Gas Coo...,(16),"KSh 3,384",https://www.kilimall.co.ke/listing/2334202-tam...,Tampered Glass Energy saving Table top Gas Coo...,unknown,gas
66,OFFER OFFER Gas Burner With Grill For 3kg for ...,(5),"KSh 1,100",https://www.kilimall.co.ke/listing/1000225156-...,OFFER OFFER Gas Burner With Grill For 3kg for ...,unknown,gas
67,DOUBLE Electric Cooker /Spiral Coil Hotplate ...,(0),"KSh 1,700",https://www.kilimall.co.ke/listing/1000416612-...,DOUBLE Electric Cooker Spiral Coil Hotplate T...,unknown,electric


In [220]:
# remove brackets from product_reviews
cooktop_no_dupes_df['cooktop_reviews'] = cooktop_no_dupes_df['cooktop_reviews'].apply(lambda x: x.strip('()'))
cooktop_no_dupes_df.head()

Unnamed: 0,cooktop_name,cooktop_reviews,cooktop_price,product_link,clean_cooktop_name,brand,cooktop_type
0,IPCONE 7102 Double Gas Cooker Auto lgnition D...,297,"KSh 1,469",https://www.kilimall.co.ke/listing/1000287239-...,IPCONE 7102 Double Gas Cooker Auto lgnition D...,ipcone,gas
1,【HOT！!】AILYONS/LYONS GS014-4 Glass Top Infrare...,738,"KSh 2,699",https://www.kilimall.co.ke/listing/3054927-hot...,HOTAILYONSLYONS GS014-4 Glass Top Infrared Dou...,ailyons,gas
2,【HOT！】AILYONS/LYONS GS017 Gas Cooker Double Bu...,708,"KSh 1,799",https://www.kilimall.co.ke/listing/2518153-hot...,HOTAILYONSLYONS GS017 Gas Cooker Double Burner...,ailyons,gas
3,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,108,"KSh 2,099",https://www.kilimall.co.ke/listing/1000285967-...,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,ipcone,gas
4,AILYONS GS014-1/GS005A-1 Glass Top Infrared Do...,1144,"KSh 2,699",https://www.kilimall.co.ke/listing/2306552-ail...,AILYONS GS014-1GS005A-1 Glass Top Infrared Dou...,ailyons,gas


In [221]:
# cleaning the cooktop_price column
cooktop_no_dupes_df['cooktop_price'] = cooktop_no_dupes_df['cooktop_price'].apply(lambda x: int(x.replace('KSh', '').replace(',', '').strip()))
cooktop_no_dupes_df.head()

Unnamed: 0,cooktop_name,cooktop_reviews,cooktop_price,product_link,clean_cooktop_name,brand,cooktop_type
0,IPCONE 7102 Double Gas Cooker Auto lgnition D...,297,1469,https://www.kilimall.co.ke/listing/1000287239-...,IPCONE 7102 Double Gas Cooker Auto lgnition D...,ipcone,gas
1,【HOT！!】AILYONS/LYONS GS014-4 Glass Top Infrare...,738,2699,https://www.kilimall.co.ke/listing/3054927-hot...,HOTAILYONSLYONS GS014-4 Glass Top Infrared Dou...,ailyons,gas
2,【HOT！】AILYONS/LYONS GS017 Gas Cooker Double Bu...,708,1799,https://www.kilimall.co.ke/listing/2518153-hot...,HOTAILYONSLYONS GS017 Gas Cooker Double Burner...,ailyons,gas
3,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,108,2099,https://www.kilimall.co.ke/listing/1000285967-...,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,ipcone,gas
4,AILYONS GS014-1/GS005A-1 Glass Top Infrared Do...,1144,2699,https://www.kilimall.co.ke/listing/2306552-ail...,AILYONS GS014-1GS005A-1 Glass Top Infrared Dou...,ailyons,gas


In [222]:
# drop cooktop_name
cooktop_no_dupes_df.drop(columns=['cooktop_name'], inplace=True)
cooktop_no_dupes_df.head()

Unnamed: 0,cooktop_reviews,cooktop_price,product_link,clean_cooktop_name,brand,cooktop_type
0,297,1469,https://www.kilimall.co.ke/listing/1000287239-...,IPCONE 7102 Double Gas Cooker Auto lgnition D...,ipcone,gas
1,738,2699,https://www.kilimall.co.ke/listing/3054927-hot...,HOTAILYONSLYONS GS014-4 Glass Top Infrared Dou...,ailyons,gas
2,708,1799,https://www.kilimall.co.ke/listing/2518153-hot...,HOTAILYONSLYONS GS017 Gas Cooker Double Burner...,ailyons,gas
3,108,2099,https://www.kilimall.co.ke/listing/1000285967-...,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,ipcone,gas
4,1144,2699,https://www.kilimall.co.ke/listing/2306552-ail...,AILYONS GS014-1GS005A-1 Glass Top Infrared Dou...,ailyons,gas


In [223]:
# rename the columns
cooktop_no_dupes_df = cooktop_no_dupes_df.rename(columns={'clean_cooktop_name': 'name', 'cooktop_reviews': 'reviews', 'cooktop_price': 'price','cooktop_type': 'type', 'product_link': 'url'})

# restructure the dataframe to have the columns in a more logical order
cooktop_no_dupes_df = cooktop_no_dupes_df[['name', 'brand', 'type', 'reviews', 'price', 'url']]

cooktop_no_dupes_df.head()

Unnamed: 0,name,brand,type,reviews,price,url
0,IPCONE 7102 Double Gas Cooker Auto lgnition D...,ipcone,gas,297,1469,https://www.kilimall.co.ke/listing/1000287239-...
1,HOTAILYONSLYONS GS014-4 Glass Top Infrared Dou...,ailyons,gas,738,2699,https://www.kilimall.co.ke/listing/3054927-hot...
2,HOTAILYONSLYONS GS017 Gas Cooker Double Burner...,ailyons,gas,708,1799,https://www.kilimall.co.ke/listing/2518153-hot...
3,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,ipcone,gas,108,2099,https://www.kilimall.co.ke/listing/1000285967-...
4,AILYONS GS014-1GS005A-1 Glass Top Infrared Dou...,ailyons,gas,1144,2699,https://www.kilimall.co.ke/listing/2306552-ail...


In [224]:
# add a category column
cooktop_no_dupes_df['category'] = 'cooktop'
cooktop_no_dupes_df.head()

Unnamed: 0,name,brand,type,reviews,price,url,category
0,IPCONE 7102 Double Gas Cooker Auto lgnition D...,ipcone,gas,297,1469,https://www.kilimall.co.ke/listing/1000287239-...,cooktop
1,HOTAILYONSLYONS GS014-4 Glass Top Infrared Dou...,ailyons,gas,738,2699,https://www.kilimall.co.ke/listing/3054927-hot...,cooktop
2,HOTAILYONSLYONS GS017 Gas Cooker Double Burner...,ailyons,gas,708,1799,https://www.kilimall.co.ke/listing/2518153-hot...,cooktop
3,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,ipcone,gas,108,2099,https://www.kilimall.co.ke/listing/1000285967-...,cooktop
4,AILYONS GS014-1GS005A-1 Glass Top Infrared Dou...,ailyons,gas,1144,2699,https://www.kilimall.co.ke/listing/2306552-ail...,cooktop


In [225]:
cooktop_no_dupes_df.shape

(4697, 7)

In [226]:
print(cooktop_no_dupes_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 4697 entries, 0 to 4714
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   name      4697 non-null   object
 1   brand     4697 non-null   object
 2   type      4697 non-null   object
 3   reviews   4697 non-null   object
 4   price     4697 non-null   int64 
 5   url       4697 non-null   object
 6   category  4697 non-null   object
dtypes: int64(1), object(6)
memory usage: 293.6+ KB
None


In [227]:
# save the cleaned data to a csv file
#cooktop_no_dupes_df.to_csv(r'data\cleaned_cooktop_data.csv', index=False)

### Standing Cooker

In [228]:
# load the csv data
standingcooker_df = pd.read_csv(r'..\data\scraped\kilimall_standingcooker.csv')

# preview the df
standingcooker_df.head()


Unnamed: 0,standing_cooker_name,standing_cooker_reviews,standing_cooker_price,product_link
0,Volsmart 4 Burners VGS-580 Free Standing Gas C...,(251),"KSh 18,499",https://www.kilimall.co.ke/listing/2953794-vol...
1,(Special Offer)Nunix KZ-560-3G1E Free Stand...,(230),"KSh 23,899",https://www.kilimall.co.ke/listing/2317709-spe...
2,【Special Offer】Volsmart 4 Burners VGS-580 Free...,(7),"KSh 18,599",https://www.kilimall.co.ke/listing/1000392642-...
3,Nunix KZ-560-GO Free Standing 4 Gas Burners Ov...,(159),"KSh 19,799",https://www.kilimall.co.ke/listing/2298602-nun...
4,Nunix 60×60 3+1 Free Standing Cooker with Elec...,(3),"KSh 26,599",https://www.kilimall.co.ke/listing/1001010600-...


In [229]:
# check df shape
standingcooker_df.shape

(1130, 4)

In [230]:
# Function to remove emojis and special characters
def remove_emojis(text):
    return re.sub(r'[^\w\s,.-]', '', text)

# Apply the function to 'standing_cooker_name'
standingcooker_df['clean_standingcooker_name'] = standingcooker_df['standing_cooker_name'].apply(remove_emojis)

standingcooker_df.head()

Unnamed: 0,standing_cooker_name,standing_cooker_reviews,standing_cooker_price,product_link,clean_standingcooker_name
0,Volsmart 4 Burners VGS-580 Free Standing Gas C...,(251),"KSh 18,499",https://www.kilimall.co.ke/listing/2953794-vol...,Volsmart 4 Burners VGS-580 Free Standing Gas C...
1,(Special Offer)Nunix KZ-560-3G1E Free Stand...,(230),"KSh 23,899",https://www.kilimall.co.ke/listing/2317709-spe...,Special OfferNunix KZ-560-3G1E Free Standin...
2,【Special Offer】Volsmart 4 Burners VGS-580 Free...,(7),"KSh 18,599",https://www.kilimall.co.ke/listing/1000392642-...,Special OfferVolsmart 4 Burners VGS-580 Free S...
3,Nunix KZ-560-GO Free Standing 4 Gas Burners Ov...,(159),"KSh 19,799",https://www.kilimall.co.ke/listing/2298602-nun...,Nunix KZ-560-GO Free Standing 4 Gas Burners Ov...
4,Nunix 60×60 3+1 Free Standing Cooker with Elec...,(3),"KSh 26,599",https://www.kilimall.co.ke/listing/1001010600-...,Nunix 6060 31 Free Standing Cooker with Electr...


Check for duplicates

In [231]:
# check for duplicates across all columns
standingcooker_duplicate_rows = standingcooker_df[standingcooker_df.duplicated()]


print(f'Number of duplicate rows: {standingcooker_duplicate_rows.shape[0]}')

Number of duplicate rows: 1


In [232]:
# handling duplicates
standingcooker_no_dupes_df = standingcooker_df.drop_duplicates()

# recheck duplicates
standingcooker_no_dupes_df[standingcooker_no_dupes_df.duplicated()]

Unnamed: 0,standing_cooker_name,standing_cooker_reviews,standing_cooker_price,product_link,clean_standingcooker_name


- The duplicates have been successfully handled, with the returned sum of duplicates as 0.

In [233]:
#check the shape to confirm number of rows after removing duplicates
standingcooker_no_dupes_df.shape

(1129, 5)

Create a list of available brands and types to help with creating additional columns in the df

In [234]:
# list of available brands
standingcooker_brands = ['generic', 'nunix', 'mika', 'hotpoint', 'eurochef', 'ramtons', 'premier', 'volsmart', 'sayona', 'haier', 'hisense', 'roch', 'bruhm', 'euroken', 'ailyons', 'amaze', 'icecool', 'exzel', 'lg', 'rebune', 'sarah', 'jiko okoa', 'von', 'bjs', 'sarahtech', 'rashnik', 'vision', 'sarah tech', 'globalstar', 'unitech', 'tlac', 'global tech', 'meko', 'beko', 'bosch', 'nunnix', 'starlux', 'armco', 'solstar', 'silver crest', 'jikokoa', 'eroucheif', 'primier', 'icook']
standingcooker_types = ['3 gas+1 electric', '4 gas', '2 gas+2 electric']
oven_capacities = ['40-60 l', 'without oven', '30-40 l', '10-20 l']

print('Available number of standing cooker brands from kilimall website: ', len(standingcooker_brands))
print(f'There are {len(standingcooker_types)} different types of cooktops ')
print(f'There are {len(oven_capacities)} different oven capacities')

Available number of standing cooker brands from kilimall website:  44
There are 3 different types of cooktops 
There are 4 different oven capacities


In [235]:
# extract brand, type and oven capacity from "clean_standingcooker_name" using above lists

# 1. function to match brands
def match_standingcooker_brand(standing_cooker_name):
    for brand in standingcooker_brands:
        if brand.lower() in standing_cooker_name.lower():
            return brand
    return 'unknown'


# 2. function to match standing cooker type
def match_standingcooker_type(standing_cooker_name):
    for standingcooker_type in standingcooker_types:
        if standingcooker_type.lower() in standing_cooker_name.lower():
            return standingcooker_type
    return 'unknown'

# 3. function to match oven capacity
def match_capacity(standing_cooker_name):
    for capacity in oven_capacities:
        if capacity.lower() in standing_cooker_name.lower():
            return capacity
    return 'unknown'

In [236]:
# apply the functions to the DataFrame

standingcooker_no_dupes_df = standingcooker_no_dupes_df.copy()

# modify the DataFrame
standingcooker_no_dupes_df['brand'] = standingcooker_no_dupes_df['clean_standingcooker_name'].apply(match_standingcooker_brand)
standingcooker_no_dupes_df['standing_cooker_type'] = standingcooker_no_dupes_df['clean_standingcooker_name'].apply(match_standingcooker_type)
standingcooker_no_dupes_df['oven_capacity'] = standingcooker_no_dupes_df['clean_standingcooker_name'].apply(match_capacity)


In [237]:
standingcooker_no_dupes_df.head()

Unnamed: 0,standing_cooker_name,standing_cooker_reviews,standing_cooker_price,product_link,clean_standingcooker_name,brand,standing_cooker_type,oven_capacity
0,Volsmart 4 Burners VGS-580 Free Standing Gas C...,(251),"KSh 18,499",https://www.kilimall.co.ke/listing/2953794-vol...,Volsmart 4 Burners VGS-580 Free Standing Gas C...,volsmart,4 gas,unknown
1,(Special Offer)Nunix KZ-560-3G1E Free Stand...,(230),"KSh 23,899",https://www.kilimall.co.ke/listing/2317709-spe...,Special OfferNunix KZ-560-3G1E Free Standin...,nunix,unknown,unknown
2,【Special Offer】Volsmart 4 Burners VGS-580 Free...,(7),"KSh 18,599",https://www.kilimall.co.ke/listing/1000392642-...,Special OfferVolsmart 4 Burners VGS-580 Free S...,volsmart,4 gas,unknown
3,Nunix KZ-560-GO Free Standing 4 Gas Burners Ov...,(159),"KSh 19,799",https://www.kilimall.co.ke/listing/2298602-nun...,Nunix KZ-560-GO Free Standing 4 Gas Burners Ov...,nunix,4 gas,unknown
4,Nunix 60×60 3+1 Free Standing Cooker with Elec...,(3),"KSh 26,599",https://www.kilimall.co.ke/listing/1001010600-...,Nunix 6060 31 Free Standing Cooker with Electr...,nunix,unknown,unknown


In [238]:

standingcooker_unique_brands = standingcooker_no_dupes_df["brand"].unique()
print(standingcooker_unique_brands)

#number of unique counts
standingcooker_unique_brands_counts = standingcooker_no_dupes_df["brand"].value_counts()
print("Frequency of available brands:\n", standingcooker_unique_brands_counts)

['volsmart' 'nunix' 'mika' 'eurochef' 'bjs' 'von' 'euroken' 'premier'
 'sarah' 'unknown' 'sayona' 'ramtons' 'jiko okoa' 'hisense' 'bruhm'
 'ailyons' 'globalstar' 'rashnik' 'haier' 'hotpoint' 'vision' 'roch'
 'unitech' 'tlac' 'rebune' 'starlux' 'global tech' 'amaze' 'beko' 'bosch'
 'armco' 'generic' 'exzel' 'meko' 'solstar' 'silver crest' 'eroucheif'
 'jikokoa']
Frequency of available brands:
 brand
nunix           269
mika            146
eurochef         86
unknown          73
volsmart         72
von              69
ramtons          60
bjs              53
premier          47
sayona           33
amaze            30
sarah            29
roch             21
haier            16
hisense          14
ailyons          12
euroken          10
globalstar        9
generic           9
solstar           9
meko              8
beko              7
rashnik           6
unitech           6
bruhm             6
hotpoint          5
exzel             4
vision            4
starlux           4
armco             

In [239]:
# exploring 'standingcooker_no_dupes' unknown brands
unknown_standingcooker_brands = standingcooker_no_dupes_df[standingcooker_no_dupes_df['brand'] == 'unknown']
unknown_standingcooker_brands.head()

Unnamed: 0,standing_cooker_name,standing_cooker_reviews,standing_cooker_price,product_link,clean_standingcooker_name,brand,standing_cooker_type,oven_capacity
38,EURO55-3+1-E Free Standing 3 Gas Cooker+1 Hotp...,(1),"KSh 24,500",https://www.kilimall.co.ke/listing/1000114056-...,EURO55-31-E Free Standing 3 Gas Cooker1 Hotpla...,unknown,unknown,unknown
48,Free Standing 4 Gas Burner With Gas Oven Cooke...,(19),"KSh 19,994",https://www.kilimall.co.ke/listing/2284584-fre...,Free Standing 4 Gas Burner With Gas Oven Cooke...,unknown,4 gas,unknown
76,WE ARE SELLING AT WHOLASE PRICES OF KSH 4150 A...,(0),"KSh 4,150",https://www.kilimall.co.ke/listing/2806161-we-...,WE ARE SELLING AT WHOLASE PRICES OF KSH 4150 A...,unknown,unknown,unknown
94,"EK-B002C hardened Glass top Gas Stove, 2 Burne...",(0),"KSh 3,299",https://www.kilimall.co.ke/listing/3073563-ekb...,"EK-B002C hardened Glass top Gas Stove, 2 Burne...",unknown,unknown,unknown
97,Pac Gas Regulator 6Kg Gas Cylinder Plus Free G...,(0),KSh 890,https://www.kilimall.co.ke/listing/2958726-pac...,Pac Gas Regulator 6Kg Gas Cylinder Plus Free G...,unknown,unknown,unknown


In [240]:
# remove brackets from standing_cooker_reviews
standingcooker_no_dupes_df['standing_cooker_reviews'] = standingcooker_no_dupes_df['standing_cooker_reviews'].apply(lambda x: x.strip('()'))
standingcooker_no_dupes_df.head()

Unnamed: 0,standing_cooker_name,standing_cooker_reviews,standing_cooker_price,product_link,clean_standingcooker_name,brand,standing_cooker_type,oven_capacity
0,Volsmart 4 Burners VGS-580 Free Standing Gas C...,251,"KSh 18,499",https://www.kilimall.co.ke/listing/2953794-vol...,Volsmart 4 Burners VGS-580 Free Standing Gas C...,volsmart,4 gas,unknown
1,(Special Offer)Nunix KZ-560-3G1E Free Stand...,230,"KSh 23,899",https://www.kilimall.co.ke/listing/2317709-spe...,Special OfferNunix KZ-560-3G1E Free Standin...,nunix,unknown,unknown
2,【Special Offer】Volsmart 4 Burners VGS-580 Free...,7,"KSh 18,599",https://www.kilimall.co.ke/listing/1000392642-...,Special OfferVolsmart 4 Burners VGS-580 Free S...,volsmart,4 gas,unknown
3,Nunix KZ-560-GO Free Standing 4 Gas Burners Ov...,159,"KSh 19,799",https://www.kilimall.co.ke/listing/2298602-nun...,Nunix KZ-560-GO Free Standing 4 Gas Burners Ov...,nunix,4 gas,unknown
4,Nunix 60×60 3+1 Free Standing Cooker with Elec...,3,"KSh 26,599",https://www.kilimall.co.ke/listing/1001010600-...,Nunix 6060 31 Free Standing Cooker with Electr...,nunix,unknown,unknown


In [241]:
# cleaning the standing_cooker_price column
standingcooker_no_dupes_df['standing_cooker_price'] = standingcooker_no_dupes_df['standing_cooker_price'].apply(lambda x: int(x.replace('KSh', '').replace(',', '').strip()))
standingcooker_no_dupes_df.head()

Unnamed: 0,standing_cooker_name,standing_cooker_reviews,standing_cooker_price,product_link,clean_standingcooker_name,brand,standing_cooker_type,oven_capacity
0,Volsmart 4 Burners VGS-580 Free Standing Gas C...,251,18499,https://www.kilimall.co.ke/listing/2953794-vol...,Volsmart 4 Burners VGS-580 Free Standing Gas C...,volsmart,4 gas,unknown
1,(Special Offer)Nunix KZ-560-3G1E Free Stand...,230,23899,https://www.kilimall.co.ke/listing/2317709-spe...,Special OfferNunix KZ-560-3G1E Free Standin...,nunix,unknown,unknown
2,【Special Offer】Volsmart 4 Burners VGS-580 Free...,7,18599,https://www.kilimall.co.ke/listing/1000392642-...,Special OfferVolsmart 4 Burners VGS-580 Free S...,volsmart,4 gas,unknown
3,Nunix KZ-560-GO Free Standing 4 Gas Burners Ov...,159,19799,https://www.kilimall.co.ke/listing/2298602-nun...,Nunix KZ-560-GO Free Standing 4 Gas Burners Ov...,nunix,4 gas,unknown
4,Nunix 60×60 3+1 Free Standing Cooker with Elec...,3,26599,https://www.kilimall.co.ke/listing/1001010600-...,Nunix 6060 31 Free Standing Cooker with Electr...,nunix,unknown,unknown


In [242]:
# drop standing_cooker_name
standingcooker_no_dupes_df.drop(columns=['standing_cooker_name'], inplace=True)
standingcooker_no_dupes_df.head()

Unnamed: 0,standing_cooker_reviews,standing_cooker_price,product_link,clean_standingcooker_name,brand,standing_cooker_type,oven_capacity
0,251,18499,https://www.kilimall.co.ke/listing/2953794-vol...,Volsmart 4 Burners VGS-580 Free Standing Gas C...,volsmart,4 gas,unknown
1,230,23899,https://www.kilimall.co.ke/listing/2317709-spe...,Special OfferNunix KZ-560-3G1E Free Standin...,nunix,unknown,unknown
2,7,18599,https://www.kilimall.co.ke/listing/1000392642-...,Special OfferVolsmart 4 Burners VGS-580 Free S...,volsmart,4 gas,unknown
3,159,19799,https://www.kilimall.co.ke/listing/2298602-nun...,Nunix KZ-560-GO Free Standing 4 Gas Burners Ov...,nunix,4 gas,unknown
4,3,26599,https://www.kilimall.co.ke/listing/1001010600-...,Nunix 6060 31 Free Standing Cooker with Electr...,nunix,unknown,unknown


In [243]:
# add category column
standingcooker_no_dupes_df['category'] = 'standing cooker'

standingcooker_no_dupes_df.head()

Unnamed: 0,standing_cooker_reviews,standing_cooker_price,product_link,clean_standingcooker_name,brand,standing_cooker_type,oven_capacity,category
0,251,18499,https://www.kilimall.co.ke/listing/2953794-vol...,Volsmart 4 Burners VGS-580 Free Standing Gas C...,volsmart,4 gas,unknown,standing cooker
1,230,23899,https://www.kilimall.co.ke/listing/2317709-spe...,Special OfferNunix KZ-560-3G1E Free Standin...,nunix,unknown,unknown,standing cooker
2,7,18599,https://www.kilimall.co.ke/listing/1000392642-...,Special OfferVolsmart 4 Burners VGS-580 Free S...,volsmart,4 gas,unknown,standing cooker
3,159,19799,https://www.kilimall.co.ke/listing/2298602-nun...,Nunix KZ-560-GO Free Standing 4 Gas Burners Ov...,nunix,4 gas,unknown,standing cooker
4,3,26599,https://www.kilimall.co.ke/listing/1001010600-...,Nunix 6060 31 Free Standing Cooker with Electr...,nunix,unknown,unknown,standing cooker


In [244]:
# renaming columns
standingcooker_no_dupes_df = standingcooker_no_dupes_df.rename(columns={'standing_cooker_reviews':'reviews', 'standing_cooker_price':'price', 'product_link':'url', 'clean_standingcooker_name':'name', 'standing_cooker_type':'type', 'oven_capacity':'capacity'})
standingcooker_no_dupes_df.head()

Unnamed: 0,reviews,price,url,name,brand,type,capacity,category
0,251,18499,https://www.kilimall.co.ke/listing/2953794-vol...,Volsmart 4 Burners VGS-580 Free Standing Gas C...,volsmart,4 gas,unknown,standing cooker
1,230,23899,https://www.kilimall.co.ke/listing/2317709-spe...,Special OfferNunix KZ-560-3G1E Free Standin...,nunix,unknown,unknown,standing cooker
2,7,18599,https://www.kilimall.co.ke/listing/1000392642-...,Special OfferVolsmart 4 Burners VGS-580 Free S...,volsmart,4 gas,unknown,standing cooker
3,159,19799,https://www.kilimall.co.ke/listing/2298602-nun...,Nunix KZ-560-GO Free Standing 4 Gas Burners Ov...,nunix,4 gas,unknown,standing cooker
4,3,26599,https://www.kilimall.co.ke/listing/1001010600-...,Nunix 6060 31 Free Standing Cooker with Electr...,nunix,unknown,unknown,standing cooker


In [245]:
# structure the df columns
standingcooker_no_dupes_df = standingcooker_no_dupes_df[['name', 'brand', 'type', 'capacity', 'reviews', 'price', 'url', 'category']]
standingcooker_no_dupes_df.head()

Unnamed: 0,name,brand,type,capacity,reviews,price,url,category
0,Volsmart 4 Burners VGS-580 Free Standing Gas C...,volsmart,4 gas,unknown,251,18499,https://www.kilimall.co.ke/listing/2953794-vol...,standing cooker
1,Special OfferNunix KZ-560-3G1E Free Standin...,nunix,unknown,unknown,230,23899,https://www.kilimall.co.ke/listing/2317709-spe...,standing cooker
2,Special OfferVolsmart 4 Burners VGS-580 Free S...,volsmart,4 gas,unknown,7,18599,https://www.kilimall.co.ke/listing/1000392642-...,standing cooker
3,Nunix KZ-560-GO Free Standing 4 Gas Burners Ov...,nunix,4 gas,unknown,159,19799,https://www.kilimall.co.ke/listing/2298602-nun...,standing cooker
4,Nunix 6060 31 Free Standing Cooker with Electr...,nunix,unknown,unknown,3,26599,https://www.kilimall.co.ke/listing/1001010600-...,standing cooker


In [246]:
standingcooker_no_dupes_df.shape

(1129, 8)

### Combining cooktops and standing cookers into one dataframe

In [247]:

# Add the missing capcity column to cooktop df with null values
cooktop_no_dupes_df.insert(3, 'capacity', np.nan)
cooktop_no_dupes_df.head()

Unnamed: 0,name,brand,type,capacity,reviews,price,url,category
0,IPCONE 7102 Double Gas Cooker Auto lgnition D...,ipcone,gas,,297,1469,https://www.kilimall.co.ke/listing/1000287239-...,cooktop
1,HOTAILYONSLYONS GS014-4 Glass Top Infrared Dou...,ailyons,gas,,738,2699,https://www.kilimall.co.ke/listing/3054927-hot...,cooktop
2,HOTAILYONSLYONS GS017 Gas Cooker Double Burner...,ailyons,gas,,708,1799,https://www.kilimall.co.ke/listing/2518153-hot...,cooktop
3,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,ipcone,gas,,108,2099,https://www.kilimall.co.ke/listing/1000285967-...,cooktop
4,AILYONS GS014-1GS005A-1 Glass Top Infrared Dou...,ailyons,gas,,1144,2699,https://www.kilimall.co.ke/listing/2306552-ail...,cooktop


In [248]:
# merge the two df 
cookers = pd.concat([cooktop_no_dupes_df, standingcooker_no_dupes_df], ignore_index=True)
cookers.head()

Unnamed: 0,name,brand,type,capacity,reviews,price,url,category
0,IPCONE 7102 Double Gas Cooker Auto lgnition D...,ipcone,gas,,297,1469,https://www.kilimall.co.ke/listing/1000287239-...,cooktop
1,HOTAILYONSLYONS GS014-4 Glass Top Infrared Dou...,ailyons,gas,,738,2699,https://www.kilimall.co.ke/listing/3054927-hot...,cooktop
2,HOTAILYONSLYONS GS017 Gas Cooker Double Burner...,ailyons,gas,,708,1799,https://www.kilimall.co.ke/listing/2518153-hot...,cooktop
3,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,ipcone,gas,,108,2099,https://www.kilimall.co.ke/listing/1000285967-...,cooktop
4,AILYONS GS014-1GS005A-1 Glass Top Infrared Dou...,ailyons,gas,,1144,2699,https://www.kilimall.co.ke/listing/2306552-ail...,cooktop


In [249]:
cookers.shape

(5826, 8)

In [250]:
# add id and source columns
# define a function that add id and source column
def id_source (df):
    df.insert(0, 'id', cookers.index)
    df.insert(1, 'source', 'kilimall')

    return df

# pass the df into the function
id_source(cookers)

#preview the df
cookers.head()


Unnamed: 0,id,source,name,brand,type,capacity,reviews,price,url,category
0,0,kilimall,IPCONE 7102 Double Gas Cooker Auto lgnition D...,ipcone,gas,,297,1469,https://www.kilimall.co.ke/listing/1000287239-...,cooktop
1,1,kilimall,HOTAILYONSLYONS GS014-4 Glass Top Infrared Dou...,ailyons,gas,,738,2699,https://www.kilimall.co.ke/listing/3054927-hot...,cooktop
2,2,kilimall,HOTAILYONSLYONS GS017 Gas Cooker Double Burner...,ailyons,gas,,708,1799,https://www.kilimall.co.ke/listing/2518153-hot...,cooktop
3,3,kilimall,IPCONE 2 Gas Cooker 02 Glass Top Double Burne...,ipcone,gas,,108,2099,https://www.kilimall.co.ke/listing/1000285967-...,cooktop
4,4,kilimall,AILYONS GS014-1GS005A-1 Glass Top Infrared Dou...,ailyons,gas,,1144,2699,https://www.kilimall.co.ke/listing/2306552-ail...,cooktop


In [251]:
cookers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5826 entries, 0 to 5825
Data columns (total 10 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   id        5826 non-null   int64 
 1   source    5826 non-null   object
 2   name      5826 non-null   object
 3   brand     5826 non-null   object
 4   type      5826 non-null   object
 5   capacity  1129 non-null   object
 6   reviews   5826 non-null   object
 7   price     5826 non-null   int64 
 8   url       5826 non-null   object
 9   category  5826 non-null   object
dtypes: int64(2), object(8)
memory usage: 455.3+ KB


In [252]:
# save the cleaned data to a csv file
cookers.to_csv(r'..\data\clean\kilimall_cookers.csv', index=False)