In [15]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from random import randint
from time import sleep 

In [16]:
# Function to extract Product Title
def get_title(soup):

    try:
        # Outer Tag Object
        title = soup.find("h1", attrs={"class":'-fs20 -pts -pbxs'})
        
        # Inner NavigatableString Object
        title_value = title.text

        # Title as a string value
        title_string = title_value.strip()

    except AttributeError:
        title_string = ""

    return title_string


# Function to Extract Original Product Price
def get_price(soup):

    try:
        price = soup.find("span", attrs={'class':'-tal -gy5 -lthr -fs16'}).string.strip()

    except AttributeError:
        price = ""

    return price


# Function to Extract Deal Price
def get_deal_price(soup):

    try:
        deal_price = soup.find("span", attrs={'class':'-b -ltr -tal -fs24'}).string.strip()

    except AttributeError:
        
        deal_price = ""

    return deal_price


# Function to Extract Deal Percentage
def get_deal_percentage(soup):

    try:
        deal_percentage = soup.find("span", attrs={'class':'bdg _dsct _dyn -mls'}).string.strip()

    except AttributeError:
        
        deal_percentage = ""

    return deal_percentage


# Function to extract Availability Status
def get_availability(soup):
    try:
        available = soup.find("p", attrs={'class':'-df -i-ctr -fs12 -pbs -gy5'})
        available = available.string.strip()

    except AttributeError:
        available = "Not Available"

    return available


# Function to extract Product Rating
def get_rating(soup):

    try:
        rating = soup.find("div", attrs={'class':'stars _s _al'}).text.strip()
    
    except AttributeError:
         rating = ""

    return rating


def get_urls():
    # add your user agent 
    HEADERS = ({'User-Agent': '', 'Accept-Language': 'en-US, en;q=0.5'})

    # The webpage URL
    URL = "https://www.jumia.co.ke/smartphones/"
    
    links_all = []
    
    for page in range(1,51):
  
        # HTTP Request
        webpage = requests.get(URL + '?page=' +str(page)+ '#catalog-listing', headers=HEADERS)
        
        # Soup Object containing all data
        soup = BeautifulSoup(webpage.content, "html.parser")

        # Fetch links as List of Tag Objects
        links = soup.find("div",{'class':'-paxs row _no-g _4cl-3cm-shs'}).find_all("a", attrs={'class':'core'})
        
        # Append the links to the list
        links_all += links
        
        sleep(randint(2,51))
        
    return links_all


In [17]:
if __name__ == '__main__':
    # add your user agent 
    HEADERS = ({'User-Agent': '', 'Accept-Language': 'en-US, en;q=0.5'})

    #Links
    links= get_urls()

    # Store the links
    links_list = []

    # Loop for extracting links from Tag Objects
    for link in links:
        if link.get('href') is not None:
            links_list.append(link.get('href'))
    

    d = {"title":[], "price":[], "deal_price":[], "discount":[], "rating":[], "availability":[]}
    
    # Loop for extracting product details from each link 
    for link in links_list:
        new_webpage = requests.get("https://www.jumia.co.ke" + link, headers=HEADERS)

        new_soup = BeautifulSoup(new_webpage.content, "html.parser")

        # Function calls to display all necessary product information
        d['title'].append(get_title(new_soup))
        d['price'].append(get_price(new_soup))
        d['deal_price'].append(get_deal_price(new_soup))
        d['discount'].append(get_deal_percentage(new_soup))
        d['rating'].append(get_rating(new_soup))
        d['availability'].append(get_availability(new_soup))

    
    jumia_df = pd.DataFrame.from_dict(d)
    #jumia_df['title'].replace('', np.nan, inplace=True)
    #jumia_df = jumia_df.dropna(subset=['title']) C:\Users\user\Desktop\CSVs
    jumia_df.to_csv('h=jumia_data.csv', header=True, index=False)


In [18]:
jumia_df

Unnamed: 0,title,price,deal_price,discount,rating,availability
0,Alcatel 3L 4gb Ram 64gb 48mp Camera 8mp Selfie...,"KSh 10,499","KSh 9,399",10%,4.2 out of 5,Not Available
1,"Tecno Spark 8C, 6.6"", 64GB + 2GB RAM (Dual SIM...","KSh 16,347","KSh 12,099",26%,5 out of 5,In stock
2,"Samsung Galaxy A13, 6.6"", 64GB + 4GB RAM (Dual...","KSh 24,521","KSh 19,699",20%,4.5 out of 5,Not Available
3,Alcatel 3L 4gb Ram 64gb 48mp Camera 8mp Selfie...,"KSh 10,999","KSh 9,499",14%,5 out of 5,Not Available
4,"Samsung Galaxy S23, 6.1"", 256GB + 8GB RAM (Dua...","KSh 139,999","KSh 129,999",7%,0 out of 5,Not Available
...,...,...,...,...,...,...
1995,"Tecno Pop 6 PRO,Display 6.6'', 32GB + 2GB RAM,...","KSh 16,000","KSh 10,999",31%,0 out of 5,Not Available
1996,"Oppo Reno 7 5G, 6.43"", 256GB + 8GB, Dual SIM, ...","KSh 69,999","KSh 55,999",20%,0 out of 5,Not Available
1997,"Oppo Reno 8 5G,6.43"",256GB+8GB,Dual SIM,4500mA...","KSh 89,998","KSh 59,100",34%,0 out of 5,Not Available
1998,"Sony Xperia 10 Plus 6.5"" 64GB ROM 4GB RAM Sma...","KSh 36,990","KSh 27,990",24%,0 out of 5,Not Available
