In [40]:
# Import relevant libraries
import pandas as pd
import time
import re

# Import selenium libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException

In [50]:
# Scraping start time
start = time.time()

# Initialize the WebDriver (ensure the ChromeDriver is in PATH)
driver = webdriver.Chrome()

# Search terms
phone_type = ["samsung", "iphone", "infinix-phones-in-kenya",
              "google", "itel", "nokia", 
              "oppo", "oneplus", "realme", 
              "tecno", "vivo", "xiaomi"]

# List to hold all product data
all_product_data = []


def scrape_phones_data(phone_type):
    
    # Base url 
    url = f"https://www.phoneplacekenya.com/product-category/smartphones/{phone_type}/"
    
    driver.get(url)
    
    # Wait for the search results to load
    phones_grid = WebDriverWait(driver, 20).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, "product-wrapper"))        
    )
    
    # Loop through each product element
    for i, product in enumerate(phones_grid):
        try:
            # Re-locate product wrapper in case of stale reference
            phones_grid = WebDriverWait(driver, 20).until(
                EC.presence_of_all_elements_located((By.CLASS_NAME, "product-wrapper"))        
            )
            
            product = phones_grid[i]  # Access product element by index
            
            # Click on the product
            product.click()
    
            # Wait for the product details to load (single element)
            phone_info = WebDriverWait(driver, 20).until(
                EC.presence_of_element_located((By.CLASS_NAME, "product-images-summary"))
            )
    
            # Product information
            product_name = phone_info.find_element(By.CSS_SELECTOR, ".product_title.entry-title").text
            data = phone_info.find_element(By.CSS_SELECTOR, ".summary.entry-summary").text
    
            # Function to extract information
            def extract_info(text):
                lines = text.strip().split('\n')
                product_info = {}
                for line in lines:
                    if ':' in line:
                        key, value = line.split(':', 1)
                        product_info[key.strip()] = value.strip()
                    else:
                        if 'Reviews' in line:
                            product_info['Reviews'] = line.strip()
                        elif 'IN STOCK' in line:
                            product_info['Status'] = line.strip()
                        elif 'Warranty' in line:
                            product_info['Warranty Info'] = line.strip()
                        elif line.startswith("East Africa") or line.startswith("UAE"):
                            product_info['Region'] = line.strip()
                        elif line.startswith("KSh"):
                            if 'Price 1' not in product_info:
                                product_info['Price 1'] = line.strip()  # First price
                            else:
                                product_info['Price 2'] = line.strip()  # Second price
                return product_info
    
            # Extract the information and append to the list
            product_data = extract_info(data)
            product_data['Product Name'] = product_name  # Add product name to the data
            all_product_data.append(product_data)
    
            time.sleep(2)
    
            # Go back to the previous page
            driver.back()
    
            time.sleep(2)
    
        except Exception as e:
            print(f"Error occurred on product {i+1}: {e}")

# Iterate over each search term
for phone in phone_type:
    print(f"Scraping jobs for: {phone}")
    scrape_phones_data(phone)
    print(f"Finished scraping for: {phone}") 


# Close the driver
driver.quit()

# Create a DataFrame with all products
df = pd.DataFrame(all_product_data)
df.to_csv('../data/scraped_phones.csv', index=False)

# End scraping
end = time.time()
print(f"Products scarping time: {(end - start) // 60})

Scraping jobs for: samsung
Finished scraping for: samsung
Scraping jobs for: iphone
Finished scraping for: iphone
Scraping jobs for: infinix-phones-in-kenya
Error occurred on product 1: Message: 
Stacktrace:
#0 0x575f3196fc5a <unknown>
#1 0x575f31652e2c <unknown>
#2 0x575f3169f661 <unknown>
#3 0x575f3169f751 <unknown>
#4 0x575f316e3f64 <unknown>
#5 0x575f316c25ed <unknown>
#6 0x575f316e1303 <unknown>
#7 0x575f316c2363 <unknown>
#8 0x575f31692247 <unknown>
#9 0x575f31692b9e <unknown>
#10 0x575f3193622b <unknown>
#11 0x575f3193a2d1 <unknown>
#12 0x575f31921ade <unknown>
#13 0x575f3193ae32 <unknown>
#14 0x575f3190677f <unknown>
#15 0x575f3195f618 <unknown>
#16 0x575f3195f7f0 <unknown>
#17 0x575f3196ed8c <unknown>
#18 0x7db5d7294ac3 <unknown>

Error occurred on product 2: Message: 
Stacktrace:
#0 0x575f3196fc5a <unknown>
#1 0x575f31652e2c <unknown>
#2 0x575f3169f661 <unknown>
#3 0x575f3169f751 <unknown>
#4 0x575f316e3f64 <unknown>
#5 0x575f316c25ed <unknown>
#6 0x575f316e1303 <unknown>
#7 

Unnamed: 0,Reviews,Status,RAM,Internal Storage,Battery,Main camera,Front camera,Display,Cover display,Processor,...,Water resistant,Straps,Sport Modes,Watch Faces,NFC,CPU,Google TV,Chromecast,Compatibility,Water Proof
0,0 Reviews,IN STOCK,12 GB,"256GB, 512GB","4000mAh, 25W",50 MP + 12 MP,10 MP,"6.7 inch, AMOLED, 120Hz","3.9 inch, AMOLED",Qualcomm Snapdragon 8 Gen 3,...,,,,,,,,,,
1,0 Reviews,IN STOCK,12GB,"256GB, 512GB, 1TB","4,4000mAh",50MP + 10MP + 12MP,4MP,7.6 inches Super AMOLED display (unfolded); 6....,,Qualcomm SM8550-AC Snapdragon 8 Gen 3 (4 nm),...,,,,,,,,,,
2,0 Reviews,IN STOCK,8GB,,"5000 mAh, non-removable, 45W",,,,,,...,,,,,,,,,,
3,,IN STOCK,8GB,256GB,"5,000 mAh",50MP + 8MP + 5MP,13MP,"6.6 inches, 120 Hz",,Exynos 1380 SoC,...,,,,,,,,,,
4,0 Reviews,IN STOCK,8GB,"128 GB, 256GB","5,000 mAh",50 MP + 12MP + 5 MP,32MP,"6 inches, 120 Hz",,Exynos 1480 SoC,...,,,,,,,,,,
5,0 Reviews,IN STOCK,8GB,256GB,"5,000 mAh, 25W",50MP + 8MP + 2MP,13 MP,6.5 inch,,Octa-core Exynos 1280 (5 nm),...,,,,,,,,,,
6,0 Reviews,IN STOCK,6,,"6,000mAh, 25W",,,6.5-inch Super AMOLED,,MediaTek Helio G99 SoC,...,,,,,,,,,,
7,0 Reviews,IN STOCK,4/6GB,,"6000mAh, 25W",,,"6 inches, Super AMOLED",,"Octa-core, Mediatek Dimensity 6100+ (6 nm)",...,,,,,,,,,,
8,0 Reviews,IN STOCK,8GB,128GB / 256GB / 512GB/1TB,"4,000 mAh",50MP + 10MP + 12MP,12 MP (wide),"6.2 inch + Dynamic LTPO AMOLED 2X, 120Hz, HDR1...",,Qualcomm Snapdragon 8 Gen 3 (4nm),...,,,,,,,,,,
9,0 Reviews,IN STOCK,12GB,256 / 512GB,4900mAh,50 MP + 10 MP + 12 MP,12 MP,"6.7 inch, AMOLED, 120Hz",,Snapdragon 8 Gen 3 / Exynos 2400,...,,,,,,,,,,


In [52]:
# Display the DataFrame
print(df.shape)
print("\n *************************************")
print(df.info())

# Display the first 10 rows of the DataFrame
df.head(10)

(209, 119)

 *************************************
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 209 entries, 0 to 208
Columns: 119 entries, Reviews to Water Proof
dtypes: object(119)
memory usage: 194.4+ KB
None


Unnamed: 0,Reviews,Status,RAM,Internal Storage,Battery,Main camera,Front camera,Display,Cover display,Processor,...,Water resistant,Straps,Sport Modes,Watch Faces,NFC,CPU,Google TV,Chromecast,Compatibility,Water Proof
0,0 Reviews,IN STOCK,12 GB,"256GB, 512GB","4000mAh, 25W",50 MP + 12 MP,10 MP,"6.7 inch, AMOLED, 120Hz","3.9 inch, AMOLED",Qualcomm Snapdragon 8 Gen 3,...,,,,,,,,,,
1,0 Reviews,IN STOCK,12GB,"256GB, 512GB, 1TB","4,4000mAh",50MP + 10MP + 12MP,4MP,7.6 inches Super AMOLED display (unfolded); 6....,,Qualcomm SM8550-AC Snapdragon 8 Gen 3 (4 nm),...,,,,,,,,,,
2,0 Reviews,IN STOCK,8GB,,"5000 mAh, non-removable, 45W",,,,,,...,,,,,,,,,,
3,,IN STOCK,8GB,256GB,"5,000 mAh",50MP + 8MP + 5MP,13MP,"6.6 inches, 120 Hz",,Exynos 1380 SoC,...,,,,,,,,,,
4,0 Reviews,IN STOCK,8GB,"128 GB, 256GB","5,000 mAh",50 MP + 12MP + 5 MP,32MP,"6 inches, 120 Hz",,Exynos 1480 SoC,...,,,,,,,,,,
5,0 Reviews,IN STOCK,8GB,256GB,"5,000 mAh, 25W",50MP + 8MP + 2MP,13 MP,6.5 inch,,Octa-core Exynos 1280 (5 nm),...,,,,,,,,,,
6,0 Reviews,IN STOCK,6,,"6,000mAh, 25W",,,6.5-inch Super AMOLED,,MediaTek Helio G99 SoC,...,,,,,,,,,,
7,0 Reviews,IN STOCK,4/6GB,,"6000mAh, 25W",,,"6 inches, Super AMOLED",,"Octa-core, Mediatek Dimensity 6100+ (6 nm)",...,,,,,,,,,,
8,0 Reviews,IN STOCK,8GB,128GB / 256GB / 512GB/1TB,"4,000 mAh",50MP + 10MP + 12MP,12 MP (wide),"6.2 inch + Dynamic LTPO AMOLED 2X, 120Hz, HDR1...",,Qualcomm Snapdragon 8 Gen 3 (4nm),...,,,,,,,,,,
9,0 Reviews,IN STOCK,12GB,256 / 512GB,4900mAh,50 MP + 10 MP + 12 MP,12 MP,"6.7 inch, AMOLED, 120Hz",,Snapdragon 8 Gen 3 / Exynos 2400,...,,,,,,,,,,


In [51]:
df['Brands'].value_counts()

Brands
Samsung    40
Apple      30
OnePlus    17
Tecno      17
Xiaomi     17
Google     15
Infinix    14
Oppo        6
Name: count, dtype: int64

In [58]:
df.iloc[:, :20].head()

Unnamed: 0,Reviews,Status,RAM,Internal Storage,Battery,Main camera,Front camera,Display,Cover display,Processor,Connectivity,Colors,OS,Warranty Info,Region,Price 1,Price 2,SKU,Brands,Product Name
0,0 Reviews,IN STOCK,12 GB,"256GB, 512GB","4000mAh, 25W",50 MP + 12 MP,10 MP,"6.7 inch, AMOLED, 120Hz","3.9 inch, AMOLED",Qualcomm Snapdragon 8 Gen 3,"Dual SIM, 3G, 4G, 5G, VoLTE, Wi-Fi","Yellow, Silver Shadow, Mint, Blue, Black, Whit...","Android 14, One UI 6.1.1",Warranty,UAE / Dubai,"KSh 108,000","KSh 103,000",,Samsung,Samsung Galaxy Z Flip 6
1,0 Reviews,IN STOCK,12GB,"256GB, 512GB, 1TB","4,4000mAh",50MP + 10MP + 12MP,4MP,7.6 inches Super AMOLED display (unfolded); 6....,,Qualcomm SM8550-AC Snapdragon 8 Gen 3 (4 nm),"Dual SIM, 3G, 4G, 5G, Wi-Fi","Navy, Silver Shadow, Pink, Black, White",,Warranty,UAE / Dubai,"KSh 162,500","KSh 205,000",,Samsung,Samsung Galaxy Z Fold 6
2,0 Reviews,IN STOCK,8GB,,"5000 mAh, non-removable, 45W",,,,,,5G,,"Android 14, One UI 6.1",,,"KSh 55,000","KSh 45,000",,Samsung,Samsung Galaxy M55
3,,IN STOCK,8GB,256GB,"5,000 mAh",50MP + 8MP + 5MP,13MP,"6.6 inches, 120 Hz",,Exynos 1380 SoC,"Dual SIM, 2G, 3G, 4G, 5G, Wi-Fi","Iceblue, Lilac, Navy","Android 14, One UI 6.1,",Warranty,East Africa,"KSh 32,000","KSh 40,500",SM-A356ELBMAFB,Samsung,Samsung Galaxy A35 5G
4,0 Reviews,IN STOCK,8GB,"128 GB, 256GB","5,000 mAh",50 MP + 12MP + 5 MP,32MP,"6 inches, 120 Hz",,Exynos 1480 SoC,"Dual SIM, 2G, 3G, 4G, Wi-Fi","Iceblue, Lilac, Navy, Lemon","Android 14, One UI 6.1,",Warranty,East Africa,"KSh 42,000","KSh 46,500",SM-A556ELBVAFB,Samsung,Samsung Galaxy A55 5G


In [60]:
# Step 1: Keep only the first 20 columns
df_limited = df.iloc[:, :20]  # Select columns from the first up to the 20th (0-based index)

# Step 2: Move the 20th column (index 19) to the first position
columns = df_limited.columns.tolist()  # Get all column names as a list
column_20 = columns.pop(19)  # Remove the 20th column from its current position
column_18 = columns.pop(18)
columns.insert(0, column_20)  # Insert the 20th column at the start
columns.insert(1, column_18)

# Reorder the DataFrame with the 20th column at the first position
df_reordered = df_limited[columns]

# Display the updated DataFrame
df_reordered.head()

Unnamed: 0,Product Name,Brands,Reviews,Status,RAM,Internal Storage,Battery,Main camera,Front camera,Display,Cover display,Processor,Connectivity,Colors,OS,Warranty Info,Region,Price 1,Price 2,SKU
0,Samsung Galaxy Z Flip 6,Samsung,0 Reviews,IN STOCK,12 GB,"256GB, 512GB","4000mAh, 25W",50 MP + 12 MP,10 MP,"6.7 inch, AMOLED, 120Hz","3.9 inch, AMOLED",Qualcomm Snapdragon 8 Gen 3,"Dual SIM, 3G, 4G, 5G, VoLTE, Wi-Fi","Yellow, Silver Shadow, Mint, Blue, Black, Whit...","Android 14, One UI 6.1.1",Warranty,UAE / Dubai,"KSh 108,000","KSh 103,000",
1,Samsung Galaxy Z Fold 6,Samsung,0 Reviews,IN STOCK,12GB,"256GB, 512GB, 1TB","4,4000mAh",50MP + 10MP + 12MP,4MP,7.6 inches Super AMOLED display (unfolded); 6....,,Qualcomm SM8550-AC Snapdragon 8 Gen 3 (4 nm),"Dual SIM, 3G, 4G, 5G, Wi-Fi","Navy, Silver Shadow, Pink, Black, White",,Warranty,UAE / Dubai,"KSh 162,500","KSh 205,000",
2,Samsung Galaxy M55,Samsung,0 Reviews,IN STOCK,8GB,,"5000 mAh, non-removable, 45W",,,,,,5G,,"Android 14, One UI 6.1",,,"KSh 55,000","KSh 45,000",
3,Samsung Galaxy A35 5G,Samsung,,IN STOCK,8GB,256GB,"5,000 mAh",50MP + 8MP + 5MP,13MP,"6.6 inches, 120 Hz",,Exynos 1380 SoC,"Dual SIM, 2G, 3G, 4G, 5G, Wi-Fi","Iceblue, Lilac, Navy","Android 14, One UI 6.1,",Warranty,East Africa,"KSh 32,000","KSh 40,500",SM-A356ELBMAFB
4,Samsung Galaxy A55 5G,Samsung,0 Reviews,IN STOCK,8GB,"128 GB, 256GB","5,000 mAh",50 MP + 12MP + 5 MP,32MP,"6 inches, 120 Hz",,Exynos 1480 SoC,"Dual SIM, 2G, 3G, 4G, Wi-Fi","Iceblue, Lilac, Navy, Lemon","Android 14, One UI 6.1,",Warranty,East Africa,"KSh 42,000","KSh 46,500",SM-A556ELBVAFB


In [61]:
df_reordered.to_csv('../data/cleaned_phones.csv')