# Case Study : Real-time Samsung Products and Prices Comparison from Multiple eCommerce Websites

## Importing Libraries

In [43]:
from bs4 import BeautifulSoup as bs
import requests
from urllib.request import urlopen
import csv
from urllib.request import Request, urlopen
import pandas as pd
from datetime import date
import time

## Function for Converting Currency from Egyptian Pound to User Preferred Currency

In [44]:
def ConvertCurrency(currency): 
    currency=currency.lower()
    url=f"https://www.currency.me.uk/convert/egp/{currency}"
    #Sending request to the URL top open it
    client=urlopen(url)
    #Redaing HTML Content
    htmlcontent=client.read()
    #Close Connection with URL
    client.close
    #Parsing HTML Content to Organize it and be Look Understandable
    htmlparsed = bs(htmlcontent,"html.parser")
    currencyprice=htmlparsed.find("input",{"id":"answer"}).get("value")
    return float(currencyprice)
    

##  Function for Creating Spreadsheet and Adding Header

In [45]:
def CreateFile(FilePath):
    file = open(FilePath, 'w')
    header=["Product Description", "Product Price", "Rating", "Details Link"]
    with open(FilePath, 'a',encoding="utf-8",newline="") as f:
        writer = csv.writer(f)
        writer.writerow(header)
    return file
        

## Function for Creating Product Class

In [46]:
class Product:
    def __init__(self, product_desc, product_price,rating,details_link):
        self.product_desc = product_desc
        self.product_price = product_price
        self.rating = rating
        self.details_link = details_link

## Function for Extracting Amazon EG Samsung Products by Price Range

In [47]:
def AmazonUrl(url,MinPrice,MaxPrice,path,currency="EGP"):
    #Sending request to the URL top open it
    client=urlopen(url)
    #Redaing HTML Content
    htmlcontent=client.read()
    #Close Connection with URL
    client.close
    #Parsing HTML Content to Organize it and be Look Understandable
    htmlparsed = bs(htmlcontent,"html.parser")
    productcontainer=htmlparsed.find_all("div",{"class":"a-section a-spacing-small puis-padding-left-small puis-padding-right-small"})
    for container in productcontainer:
        product_desc=container.find("span",{"class":"a-size-base-plus a-color-base a-text-normal"})
        product_desc=product_desc.text.strip()
        product_price=container.find("span",{"class":"a-price-whole"})
        if product_price==None:
            continue
        else:
            product_price=product_price.text.strip()
            product_price=product_price.replace(",","")
            if (currency=="Default"):
                product_price=int(float(product_price))
            else:
                product_price=float(product_price)
                product_price=ConvertCurrency(currency)*product_price
                product_price=int(float(product_price))
        rating=container.find("span",{"class":"a-icon-alt"})
        if rating==None:
            rating="Not Found"
        else:
            rating=rating.text.strip()
            rating=rating[0:3]
        details_link=container.find("a",{"class":"a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal"}).get('href')
        if details_link==None:
            details_link="Not Found"
        else:
            details_link="https://www.amazon.eg"+details_link
        row=[product_desc,product_price,rating,details_link]
        if (product_price>=MinPrice and product_price<=MaxPrice):
            with open(path, 'a',encoding="utf-8",newline="") as f:
                writer = csv.writer(f)
                writer.writerow(row)

## Function for Extracting Amazon EG Samsung Products by Product Keywords

In [48]:
def AmazonKeyword(url,keyword,currency="EGP"):
    #Result List to add all matched Products with the Keywords
    Result=[]
    #Sending request to the URL top open it
    client=urlopen(url)
    #Redaing HTML Content
    htmlcontent=client.read()
    #Close Connection with URL
    client.close
    #Parsing HTML Content to Organize it and be Look Understandable
    htmlparsed = bs(htmlcontent,"html.parser")
    productcontainer=htmlparsed.find_all("div",{"class":"a-section a-spacing-small puis-padding-left-small puis-padding-right-small"})
    for container in productcontainer:
        product_desc=container.find("span",{"class":"a-size-base-plus a-color-base a-text-normal"})
        product_desc=product_desc.text.strip()
        #Checking if the product description is matching the Keywords or not
        s1=product_desc.lower()
        s2=keyword.lower().split()
        flag=True
        for i in s2:
            if i not in s1:
                flag=False
                break
        if flag==False:
            continue
        product_price=container.find("span",{"class":"a-price-whole"})
        if product_price==None:
            continue
        else:
            product_price=product_price.text.strip()
            product_price=product_price.replace(",","")
            if (currency=="Default"):
                product_price=int(float(product_price))
            else:
                product_price=float(product_price)
                product_price=ConvertCurrency(currency)*product_price
                product_price=int(float(product_price))
        rating=container.find("span",{"class":"a-icon-alt"})
        if rating==None:
            rating="Not Found"
        else:
            rating=rating.text.strip()
            rating=rating[0:3]
        details_link=container.find("a",{"class":"a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal"}).get('href')
        if details_link==None:
            details_link="Not Found"
        else:
            details_link="https://www.amazon.eg"+details_link
        #Creating New Product Object to add it to Products List
        NewProduct=Product(product_desc,product_price,rating,details_link)
        Result.append(NewProduct)
    return Result

## Function for Extracting Noon Samsung Products by Price Range

In [49]:
def NoonUrl(url,MinPrice,MaxPrice,path,currency="EGP"):
    #Sending request to the URL top open it
    client=urlopen(url)
    #Redaing HTML Content
    htmlcontent=client.read()
    #Close Connection with URL
    client.close
    #Parsing HTML Content to Organize it and be Look Understandable
    htmlparsed = bs(htmlcontent,"html.parser")   
    productcontainer=htmlparsed.select("span.sc-5e739f1b-0.gEERDr.wrapper.productContainer")
    for container in productcontainer:
        product_desc=container.find("div",{"class":"sc-2f7ba0e9-12 lblzds"}).get('title')
        product_desc=product_desc.strip()
        product_price=container.find("div",{"class":"sc-ac248257-1 bEaNkb"})
        if product_price==None:
            continue
        else:
            product_price=product_price.text.strip()
            product_price=product_price.replace("EGP ","")
            product_price=product_price.replace(",","")
            if (currency=="Default"):
                product_price=int(float(product_price))
            else:
                product_price=float(product_price)
                product_price=ConvertCurrency(currency)*product_price
                product_price=int(float(product_price))
        rating=container.find("span",{"class":"ratingValue"})
        if rating==None:
            rating="Not Found"
        else:
            rating=rating.text.strip()
        details_link=container.a
        details_link=details_link.get('href')
        if details_link==None:
            details_link="Not Found"
        else:
            details_link="https://www.noon.com"+details_link
        row=[product_desc,product_price,rating,details_link]
        if (product_price>=MinPrice and product_price<=MaxPrice):
            with open(path, 'a',encoding="utf-8",newline="") as f:
                writer = csv.writer(f)
                writer.writerow(row)

## Function for Extracting Noon Samsung Products by Product Keywords

In [50]:
def NoonKeyword(url,keyword,currency="EGP"):
    #Result List to add all matched Products with the Keywords
    Result=[]
    #Sending request to the URL top open it
    client=urlopen(url)
    #Redaing HTML Content
    htmlcontent=client.read()
    #Close Connection with URL
    client.close
    #Parsing HTML Content to Organize it and be Look Understandable
    htmlparsed = bs(htmlcontent,"html.parser")   
    productcontainer=htmlparsed.select("span.sc-5e739f1b-0.gEERDr.wrapper.productContainer")
    for container in productcontainer:
        product_desc=container.find("div",{"class":"sc-2f7ba0e9-12 lblzds"}).get('title')
        product_desc=product_desc.strip()
        #Checking if the product description is matching the Keywords or not
        s1=product_desc.lower()
        s2=keyword.lower().split()
        flag=True
        for i in s2:
            if i not in s1:
                flag=False
                break
        if flag==False:
            continue
        product_price=container.find("div",{"class":"sc-ac248257-1 bEaNkb"})
        if product_price==None:
            continue
        else:
            product_price=product_price.text.strip()
            product_price=product_price.replace("EGP ","")
            product_price=product_price.replace(",","")
            if (currency=="Default"):
                product_price=int(float(product_price))
            else:
                product_price=float(product_price)
                product_price=ConvertCurrency(currency)*product_price
                product_price=int(float(product_price))
        rating=container.find("span",{"class":"ratingValue"})
        if rating==None:
            rating="Not Found"
        else:
            rating=rating.text.strip()
        details_link=container.a
        details_link=details_link.get('href')
        if details_link==None:
            details_link="Not Found"
        else:
            details_link="https://www.noon.com"+details_link
        #Creating New Product Object to add it to Products List
        NewProduct=Product(product_desc,product_price,rating,details_link)
        Result.append(NewProduct)
    return Result

## Function for Extracting Jumia Samsung Products by Price Range

In [51]:
def JumiaUrl(url,MinPrice,MaxPrice,path,currency="EGP"):
    #Sending request to the URL top open it
    head = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
        'Accept-Encoding': 'none',
        'Accept-Language': 'en-US,en;q=0.8',
        'Connection': 'keep-alive',
    }
    req = Request(url, headers=head)
    client=urlopen(req)
    #Redaing HTML Content
    htmlcontent=client.read()
    #Close Connection with URL
    client.close
    #Parsing HTML Content to Organize it and be Look Understandable
    htmlparsed = bs(htmlcontent,"html.parser")   
    mobilecontainer=htmlparsed.find_all("article",{"class":"prd _fb col c-prd"})
    for container in mobilecontainer:
        product_desc=container.find("h3",{"class":"name"})
        product_desc=product_desc.text.strip()
        product_price=container.find("div",{"class":"prc"})
        if product_price==None:
            continue
        else:
            product_price=product_price.text.strip()
            product_price=product_price.replace("EGP ","")
            product_price=product_price.replace(",","")
            if (currency=="Default"):
                product_price=int(float(product_price))
            else:
                product_price=float(product_price)
                product_price=ConvertCurrency(currency)*product_price
                product_price=int(float(product_price))
        rating=container.find("div",{"class":"stars _s"})
        if rating==None:
            rating="Not Found"
        else:
            rating=rating.text.strip()
            rating=rating[0:3]
            rating=rating.replace(" o","")
        details_link=container.find("a",{"class":"core"}).get("href")
        if details_link==None:
            details_link="Not Found"
        else:
            details_link="https://www.jumia.com.eg"+details_link
        row=[product_desc,product_price,rating,details_link]
        if (product_price>=MinPrice and product_price<=MaxPrice):
            with open(path, 'a',encoding="utf-8",newline="") as f:
                writer = csv.writer(f)
                writer.writerow(row)

## Function for Extracting Jumia Samsung Products by Product Keywords

In [52]:
def JumiaKeyword(url,keyword,currency="EGP"):
    #Result List to add all matched Products with the Keywords
    Result=[]
    #Sending request to the URL top open it
    head = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
        'Accept-Encoding': 'none',
        'Accept-Language': 'en-US,en;q=0.8',
        'Connection': 'keep-alive',
    }
    req = Request(url, headers=head)
    client=urlopen(req)
    #Redaing HTML Content
    htmlcontent=client.read()
    #Close Connection with URL
    client.close
    #Parsing HTML Content to Organize it and be Look Understandable
    htmlparsed = bs(htmlcontent,"html.parser")   
    mobilecontainer=htmlparsed.find_all("article",{"class":"prd _fb col c-prd"})
    for container in mobilecontainer:
        product_desc=container.find("h3",{"class":"name"})
        product_desc=product_desc.text.strip()
        #Checking if the product description is matching the Keywords or not
        s1=product_desc.lower()
        s2=keyword.lower().split()
        flag=True
        for i in s2:
            if i not in s1:
                flag=False
                break
        if flag==False:
            continue
        product_price=container.find("div",{"class":"prc"})
        if product_price==None:
            continue
        else:
            product_price=product_price.text.strip()
            product_price=product_price.replace("EGP ","")
            product_price=product_price.replace(",","")
            if (currency=="Default"):
                product_price=int(float(product_price))
            else:
                product_price=float(product_price)
                product_price=ConvertCurrency(currency)*product_price
                product_price=int(float(product_price))
        rating=container.find("div",{"class":"stars _s"})
        if rating==None:
            rating="Not Found"
        else:
            rating=rating.text.strip()
            rating=rating[0:3]
            rating=rating.replace(" o","")
        details_link=container.find("a",{"class":"core"}).get("href")
        if details_link==None:
            details_link="Not Found"
        else:
            details_link="https://www.jumia.com.eg"+details_link
        row=[product_desc,product_price,rating,details_link]
        #Creating New Product Object to add it to Products List
        NewProduct=Product(product_desc,product_price,rating,details_link)
        Result.append(NewProduct)
    return Result

## Function for Creating Jumia Products Results File after applying Price Range

In [53]:
def ExtractJumiaData(MinPrice,MaxPrice,path,passedcurrency="EGP"):
    today=date.today()
    today=str(today)
    filename=f'{today} Jumia Samsung Products Price Range({MinPrice}{passedcurrency}-{MaxPrice}{passedcurrency})'
    filepath=path+f'\\{filename}.csv'
    file=CreateFile(filepath)
    #Looping on All Jumia Search Result Pages of Samsung mobile phones by incrementing the page index in the URL
    #NoOfPages is The Number of Pages to Extract
    NoOfPages=10
    for i in range (1,NoOfPages):
        JumiaUrl(f"https://www.jumia.com.eg/catalog/?q=samsung&page={i}#catalog-listing",MinPrice,MaxPrice,filepath,currency=passedcurrency)
    return file.close()

## Function for Creating List of All Jumia Products after Matching Input Keywords

In [54]:
def ExtractJumiaDatabyKeywords(keyword,passedcurrency="EGP"):
    JumiaList=[]
    #Looping on All Jumia Search Result Pages of Samsung mobile phones by incrementing the page index in the URL
    #NoOfPages is The Number of Pages to Extract
    NoOfPages=10
    for i in range (1,NoOfPages):
        JumiaList.append(JumiaKeyword(f"https://www.jumia.com.eg/catalog/?q=samsung&page={i}#catalog-listing",keyword,passedcurrency))
    return JumiaList

## Function for Creating Amazon EG Products Results File after applying Price Range

In [55]:
def ExtractAmazonData(MinPrice,MaxPrice,path,passedcurrency="EGP"):
    today=date.today()
    today=str(today)
    filename=f'{today} Amazon Samsung Products Price Range({MinPrice}{passedcurrency}-{MaxPrice}{passedcurrency})'
    filepath=path+f'\\{filename}.csv'
    file=CreateFile(filepath)
    #Looping on All Amazon Search Result Pages of Samsung mobile phones by incrementing the page index in the URL
    #NoOfPages is The Number of Pages to Extract
    NoOfPages=8
    for i in range (1,NoOfPages):
        AmazonUrl(f"https://www.amazon.eg/-/en/s?k=samsung&page={i}&crid=3BSLLL8EOE57R&qid=1661370064&sprefix=samsung%2Caps%2C201&ref=sr_pg_7",MinPrice,MaxPrice,filepath,passedcurrency)
    return file.close()

## Function for Creating List of All Amazon EG Products after Matching Input Keywords

In [56]:
def ExtractAmazonDatabyKeywords(keyword,passedcurrency="EGP"):
    AmazonList=[]
    #Looping on All Amazon Search Result Pages of Samsung mobile phones by incrementing the page index in the URL
    #NoOfPages is The Number of Pages to Extract
    NoOfPages=8
    for i in range (1,NoOfPages):
        AmazonList.append(AmazonKeyword(f"https://www.amazon.eg/-/en/s?k=samsung&page={i}&crid=3BSLLL8EOE57R&qid=1661370064&sprefix=samsung%2Caps%2C201&ref=sr_pg_7",keyword,passedcurrency))
    return AmazonList

## Function for Creating Noon Products Results File after applying Price Range

In [57]:
def ExtractNoonData(MinPrice,MaxPrice,path,passedcurrency="EGP"):
    today=date.today()
    today=str(today)
    filename=f'{today} Noon Samsung Products Price Range({MinPrice}{passedcurrency}-{MaxPrice}{passedcurrency})'
    filepath=path+f'\\{filename}.csv'
    file=CreateFile(filepath)
    #Looping on All Noon Search Result Pages of Samsung mobile phones by incrementing the page index in the URL
    #NoOfPages is The Number of Pages to Extract
    NoOfPages=10
    for i in range (1,NoOfPages):
        NoonUrl(f"https://www.noon.com/egypt-en/search/?limit=50&page={i}&q=samsung&sort%5Bby%5D=popularity&sort%5Bdir%5D=desc&gclid=CjwKCAjw6fyXBhBgEiwAhhiZsslQkX0jRP1KoQRDzquZahRPU_BQaKiMPCbs_HtxWZTOkd_2kWv7ZBoCxbIQAvD_BwE&utm_campaign=C1000151355N_eg_en_web_searchxxexactandphrasexxbrandpurexx08082022_noon_web_c1000088l_acquisition_sembranded_&utm_medium=cpc&utm_source=C1000088L",MinPrice,MaxPrice,filepath,passedcurrency)
    return file.close()

## Function for Creating List of All Noon Products after Matching Input Keywords

In [58]:
def ExtractNoonDatabyKeywords(keyword,passedcurrency="EGP"):
    NoonList=[]
    #Looping on All Noon Search Result Pages of Samsung mobile phones by incrementing the page index in the URL
    #NoOfPages is The Number of Pages to Extract
    NoOfPages=10
    for i in range (1,NoOfPages):
        NoonList.append(NoonKeyword(f"https://www.noon.com/egypt-en/search/?limit=50&page={i}&q=samsung&sort%5Bby%5D=popularity&sort%5Bdir%5D=desc&gclid=CjwKCAjw6fyXBhBgEiwAhhiZsslQkX0jRP1KoQRDzquZahRPU_BQaKiMPCbs_HtxWZTOkd_2kWv7ZBoCxbIQAvD_BwE&utm_campaign=C1000151355N_eg_en_web_searchxxexactandphrasexxbrandpurexx08082022_noon_web_c1000088l_acquisition_sembranded_&utm_medium=cpc&utm_source=C1000088L",keyword,passedcurrency))
    return NoonList

## Main Program

In [59]:
def Main():
    print("Welcome to The Automated Tool for Real-time Product and Price Comparison from Multiple eCommerce Websites\n----------------------------------------------------------------------------------------------------\n")
    print("This Tool is Scraping Samsung Products Data from Popular eCommerce Websites in Egypt which are : Amazon EG,Noon and Jumia\n")
    choose=input("Do you want to deal with prices in a currency other than the Egyptian pound ? (Y\\N)\n")
    flag="EGP"
    if(choose=='Y'):
        curr=input("Please Enter Currency ISO Code you want to deal with (e.g., USD, EUR, GBP) : ")
        flag=curr
    print("Choose an Option to Continue\n")
    print("1.Comparing Samsung Products by Price Range\n")
    print("2.Comparing Prices by Samsung Product\n")
    option=input()
    option=int(option)
    if (option==1):
        MinPrice=input("Minimum Price : ")
        MinPrice=int(MinPrice)
        MaxPrice=input("Maximum Price : ")
        MaxPrice=int(MaxPrice)
        path=input("Enter The Path to Extract Result Files : ")
        print("\nConnecting to Noon....\n")
        time.sleep(3)
        print("Extracting Noon Data....\n")
        if flag!="EGP":
            ExtractNoonData(MinPrice,MaxPrice,path,curr)
        else:
            ExtractNoonData(MinPrice,MaxPrice,path)
        print("---->Successfully Extracted Noon Data\n")
        print("Connecting to AmazonEg....\n")
        time.sleep(3)
        print("Extracting AamazonEG Data....\n")
        if flag!="EGP":
            ExtractAmazonData(MinPrice,MaxPrice,path,curr)
        else:
            ExtractAmazonData(MinPrice,MaxPrice,path)
        print("---->Successfully Extracted Amazon Data\n")
        print("Connecting to Noon....\n")
        time.sleep(3)
        print("Extracting Jumia Data....\n")
        if flag!="EGP":
            ExtractJumiaData(MinPrice,MaxPrice,path,curr)
        else:
            ExtractJumiaData(MinPrice,MaxPrice,path)
        print("---->Successfully Extracted Jumia Data\n")
    elif option==2:
        y=input("Enter Product Keywords to Search for : ")
        time.sleep(3)
        print("Getting The List of Products matching the Keywords...")
        if flag!="EGP":
            Amazon=ExtractAmazonDatabyKeywords(y,curr)
        else:
            Amazon=ExtractAmazonDatabyKeywords(y)
        if len(Amazon)>0:
            print("\n")
            print("---->Successfully Extracted Amazon Data\n")
            for i in Amazon:
                for j in i:
                    print(f"Product Description : {j.product_desc}")
                    print(f"Product Price : {j.product_price} {flag}")
                    print(f"Product Rating : {j.rating}")
                    print(f"Details Link : {j.details_link}")
                    print("\n")
        else:
            print("Nothing to show at Amazon")
        if flag!="EGP":
            Jumia=ExtractJumiaDatabyKeywords(y,curr)
        else:
            Jumia=ExtractJumiaDatabyKeywords(y)
        if len(Jumia)>0:
            print("\n")
            print("---->Successfully Extracted Jumia Data\n")
            for i in Jumia:
                for j in i:
                    print(f"Product Description : {j.product_desc}")
                    print(f"Product Price : {j.product_price} {flag}")
                    print(f"Product Rating : {j.rating}")
                    print(f"Details Link : {j.details_link}")
                    print("\n")
        else:
            print("Nothing to show at Jumia")
        if flag!="EGP":
            Noon=ExtractNoonDatabyKeywords(y,curr)
        else:
            Noon=ExtractNoonDatabyKeywords(y)
        if len(Noon)>0:
            print("\n")
            print("---->Successfully Extracted Noon Data\n")
            for i in Noon:
                for j in i:
                    print(f"Product Description : {j.product_desc}")
                    print(f"Product Price : {j.product_price} {flag}")
                    print(f"Product Rating : {j.rating}")
                    print(f"Details Link : {j.details_link}")
                    print("\n")
        else:
            print("Nothing to Show at Noon")
    else:
        print("Enter Correct Option!")
        
        
        
        

In [60]:
Main()

Welcome to The Automated Tool for Real-time Product and Price Comparison from Multiple eCommerce Websites
----------------------------------------------------------------------------------------------------

This Tool is Scraping Samsung Products Data from Popular eCommerce Websites in Egypt which are : Amazon EG,Noon and Jumia

Do you want to deal with prices in a currency other than the Egyptian pound ? (Y\N)
Y
Please Enter Currency ISO Code you want to deal with (e.g., USD, EUR, GBP) : USD
Choose an Option to Continue

1.Comparing Samsung Products by Price Range

2.Comparing Prices by Samsung Product

1
Minimum Price : 600
Maximum Price : 900
Enter The Path to Extract Result Files : D:\

Connecting to Noon....

Extracting Noon Data....

---->Successfully Extracted Noon Data

Connecting to AmazonEg....

Extracting AamazonEG Data....

---->Successfully Extracted Amazon Data

Connecting to Noon....

Extracting Jumia Data....

---->Successfully Extracted Jumia Data

