## Web Scaping (Jumia)

In [2]:
import pandas as pd
from bs4 import BeautifulSoup 
import requests
import re

In [3]:
products = []
for i in range(1,51):
    url = f"https://www.jumia.com.ng/mobile-phones/?page={i}#catalog-listing"
    page  = requests.get(url)
    page.encoding = "UTF-8"
    soup = BeautifulSoup(page.text, 'html.parser')
    main_div = soup.find('main', class_ = "has-b2top -pts")
    articles = main_div.find_all('article', class_ = 'prd _fb col c-prd')
    
    for article in articles:
        #Get the names
        name = article.find('h3', class_ = 'name').text.strip()
         
        # Split the name and color
        name_and_color = name.split('-')
        # Extract the color
        if len(name_and_color) > 1:
            color = name_and_color[-1].strip()
        elif 'Grey' in name:
                color = 'Grey'
        elif 'Black' in name:
            color = 'Black'
        else:
            color = 'n/a'
    
        #Get the prices (new and old)
        price = article.find('div' , class_ = 'prc').text.strip()
        price_match = re.search(r'^\₦?\s*\d+(,\d{3})*', price)
        if price_match:
            price_text = price_match.group()
            price_text = price_text.replace('₦', '').strip()
            price = int(price_text.replace(',', ''))
        else:
            price = 'N/A'
            
        old_price_element = article.find('div', class_='old')
        if old_price_element:
            old_price = old_price_element.text.strip()
            old_price_match = re.search(r'^\₦?\s*\d+(,\d{3})*', old_price)
            if old_price_match:
                old_price_text = old_price_match.group()
                old_price_text = old_price_text.replace('₦', '').strip()
                old_price = int(old_price_text.replace(',', ''))
            else:
                old_price = 'N/A'
        else:
            old_price = 'N/A'

    
        #Getting the dicount percentage
        discount_element = article.find('div', class_ = 'bdg _dsct _sm')
        if discount_element:
            discount = discount_element.text.strip()
        else:
            discount = 'N/A'
    
        #Getting the rating
        rating_element = article.find('div', class_ = 'stars _s')
        if rating_element:
            rating = rating_element.text.strip()
        else:
            rating = 'N/A'
    
        #Getting the store
        store = article.find('div', class_='bdg _mall _xs')
        if store:
            store_name = store.text.strip()
        else:
            store_name = 'Not from official store'
    
        #Getting the type of shipping
        shipping_element = article.find('svg', class_ = 'ic xprss')
        if shipping_element:
            shipping_type = shipping_element.attrs['aria-label']
        else:
            shipping_type = 'Normal Shipping'

        #Shipped from?
        shipped_from_element = article.find('div', class_ = 'bdg _glb _xs')
        if shipped_from_element:
            shipped_from = shipped_from_element.text.strip()
        else:
            shipped_from = "Not shipped from abroad"
        
        products.append([name, color, price, old_price, discount, rating, store_name, shipping_type, shipped_from])

In [4]:
products

[['itel 20000mAh Fast Charging, Bright Torch STAR 200Fast Power Bank',
  'n/a',
  15990,
  19790,
  '19%',
  '4 out of 5',
  'Not from official store',
  'Normal Shipping',
  'Not shipped from abroad'],
 ['Nokia 110 Africa Edition Dual SIM Wireless FM, Torch, Camera Phone',
  'n/a',
  13871,
  18000,
  '23%',
  '4.1 out of 5',
  'Not from official store',
  'Express Shipping',
  'Not shipped from abroad'],
 ['itel 2163 Wireless FM Torchlight Dual SIM - Black',
  'Black',
  11500,
  15500,
  '26%',
  '4 out of 5',
  'Official Store',
  'Express Shipping',
  'Not shipped from abroad'],
 ['itel 2163 Wireless FM Torchlight Dual SIM - Black',
  'Black',
  11494,
  15000,
  '23%',
  '4.2 out of 5',
  'Official Store',
  'Normal Shipping',
  'Not shipped from abroad'],
 ['FreeYond M5,4GB+128GB,4G 6.52" Smartphone 5000mAh,Grey',
  'Grey',
  97000,
  142498,
  '32%',
  '3.7 out of 5',
  'Not from official store',
  'Express Shipping',
  'Not shipped from abroad'],
 ["itel S24 6.6'' 8GB RAM/256G

In [5]:
products_df = pd.DataFrame(products, columns = ['Name', 'Color', 'New_Price', 'Old_Price', 'Discount', 'Rating', 'Store_Name', 'Shipping_Type', 'Shipped_From'])
products_df


Unnamed: 0,Name,Color,New_Price,Old_Price,Discount,Rating,Store_Name,Shipping_Type,Shipped_From
0,"itel 20000mAh Fast Charging, Bright Torch STAR...",,15990,19790,19%,4 out of 5,Not from official store,Normal Shipping,Not shipped from abroad
1,"Nokia 110 Africa Edition Dual SIM Wireless FM,...",,13871,18000,23%,4.1 out of 5,Not from official store,Express Shipping,Not shipped from abroad
2,itel 2163 Wireless FM Torchlight Dual SIM - Black,Black,11500,15500,26%,4 out of 5,Official Store,Express Shipping,Not shipped from abroad
3,itel 2163 Wireless FM Torchlight Dual SIM - Black,Black,11494,15000,23%,4.2 out of 5,Official Store,Normal Shipping,Not shipped from abroad
4,"FreeYond M5,4GB+128GB,4G 6.52"" Smartphone 5000...",Grey,97000,142498,32%,3.7 out of 5,Not from official store,Express Shipping,Not shipped from abroad
...,...,...,...,...,...,...,...,...,...
1995,Reno 8 Pro 6.8inch Smartphone 6gb RaM 128G...,,114000,163200,30%,,Not from official store,Normal Shipping,Shipped from abroad
1996,Nokia 3310 Classic Mobile Grey,Grey,32000,35000,9%,,Not from official store,Normal Shipping,Not shipped from abroad
1997,Samsung GALAXY A35 5G 8/256GB DUAL SIM- AWESOM...,5000MAH,541000,620000,13%,,Not from official store,Normal Shipping,Not shipped from abroad
1998,Nokia 3310 Classic Mobile Phone Dual SIM Long ...,Dark Blue,32000,35000,9%,,Not from official store,Normal Shipping,Not shipped from abroad


In [6]:
products_df.describe()

Unnamed: 0,New_Price
count,2000.0
mean,329605.0
std,475562.0
min,350.0
25%,40600.0
50%,165000.0
75%,403499.8
max,4900000.0


In [7]:
products_df.to_csv(r'Mobile_Phones.csv', index = False)