In [85]:
from bs4 import BeautifulSoup
import requests
import os
import pandas as pd
import datetime

In [86]:
try:
    url = 'https://www.kilimall.co.ke/search-result?id=2069&form=category&ctgName=TV,Audio&Video'
    
    # Make the request
    response = requests.get(url)
    
    # Raise an exception for HTTP errors
    response.raise_for_status()
    
    # Parse the page content
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Use find_all to get all product listings
    listings = soup.find('div', class_='listings').find_all('div', class_='listing-item')
    
    # Define the path for the CSV file
    file_path = 'KilimallSmartTVDataSet.csv'
    
    # Check if the file exists to decide whether to write the header
    file_exists = os.path.exists(file_path)
    
    # Open the CSV file in append mode
    with open(file_path, 'a+', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        
        # If the file does not exist or is empty, write the header
        if not file_exists or os.stat(file_path).st_size == 0:
            writer.writerow(['product', 'price', 'review', 'tag'])
        
        # Scrape the data and write it to the CSV
        for item in listings:
            # Product title
            product_title = item.find('p', class_='product-title')
            product_title_text = product_title.get_text(strip=True) if product_title else 'No title'
            
            # Price
            price = item.find('div', class_='product-price')
            price_text = price.get_text(strip=True) if price else 'N/A'
            
            # Reviews
            reviews = item.find('span', class_='reviews')
            reviews_text = reviews.get_text(strip=True) if reviews else 'No reviews'
            
            # Tagged by (logistics)
            taggedBy = item.find('div', class_='logistics-tag')
            taggedBy_text = taggedBy.get_text(strip=True) if taggedBy else 'No logistics info'
            
            # Prepare the data list to write to CSV
            data = [product_title_text, price_text, reviews_text, taggedBy_text]
            
            # Write the data to the CSV file
            writer.writerow(data)

    # Time stamp to know when the data was collected
    today = datetime.date.today()
    print(f"Data collected on: {today}")
        
    # Load and print the data from the CSV file using pandas
    df = pd.read_csv(file_path)
    print(df)
        
except Exception as e:
    print(f"An error occurred: {e}")

Data collected on: 2024-08-31
    Bluetooth Speaker Portable Outdoor Stereo System LED Light Shinning with USB FM AUX TF Card  \
0    Double Base Wireless Bluetooth Speaker Bulb LE...                                            
1    Vitron 32 inch Frameless Television HTC 3218 L...                                            
2    Vitron 2.1CH Multimedia Bluetooth Woofer Speak...                                            
3    TAGWOOD LS-421F  2.1CH 6000W PMPO WOOFER Home ...                                            
4    Vitron 32 Inch Frameless Smart TV HD Netflix T...                                            
..                                                 ...                                            
247  Vitron 2.1CH Multimedia Bluetooth Woofer Home ...                                            
248  VITRON 43" INCH BLUETOOTH ENABLED HTC4388FS FR...                                            
249  Vitron Digital Full HD DVD Player/VCD,CD,PLAY ...                         