# <strong>Httpx:</strong> Web Scrapping

**Name:** Arsalan Ali<br>
**Email:** arslanchaos@gmail.com

---

### **Table of Contents**
* Website to Scrap: "Thomann"
* Link of the site: https://www.thomann.de/intl/lp_models.html

**Note :** Columns to extract
*   Manufacturer
*   Title
*   Price

---

In [1]:
# Importing Libraries
import httpx
import csv
from selectolax.parser import HTMLParser
from dataclasses import dataclass, asdict

In [2]:
# Creating Data Class to deal with datatypes
@dataclass
class Product:
    manufacturer : str
    title : str
    price: str

In [3]:
# Setting URL, Header and Parser
def get_html(page):
    url = f"https://www.thomann.de/intl/lp_models.html?ls=25&pg={page}"
    headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36'}
    resp = httpx.get(url, headers=headers)
    return HTMLParser(resp.text)

# Setting up the Products using CSS Selectors
def parse_products(html):
    products = html.css("div.product")

    results = []
    for item in products:
        new_item = Product(
            manufacturer=item.css_first("span.title__manufacturer").text(),
            title=item.css_first("span.title__name").text(),
            price=item.css_first("div.product__price-group").text().split()[0].replace(".", ","))
        results.append(asdict(new_item))
    return results

# Convert data to CSV
def to_csv(res):
    import os.path
    file_exists = os.path.isfile("guitars.csv")
    with open("guitars.csv", "a") as file:
        writer = csv.DictWriter(file, fieldnames=["manufacturer", "title", "price"])
        if not file_exists:
            writer.writeheader()        
        writer.writerows(res)


# Calls out all the functions
def main():
    for i in range(1,25):
        html = get_html(i)
        res = parse_products(html)
        to_csv(res)

In [4]:
main()

In [5]:
import pandas as pd

pd.read_csv("guitars.csv", encoding="Latin-1")

Unnamed: 0,manufacturer,title,price
0,Harley Benton,SC-1000 SBK Progressive Line,175
1,Harley Benton,Electric Guitar Kit Single Cut,87
2,Harley Benton,SC-450 BK Classic Series,123
3,Harley Benton,SC-450 P90 GT Classic Series,132
4,Harley Benton,SC-450Plus HB Vintage Series,158
...,...,...,...
586,ESP,E-II Eclipse Snow Whit B-Stock,1939
587,Gretsch,G2210 Streaml. Jr. Jet B-Stock,229
588,Gretsch,G6128T-53 VS Duo Jet B B-Stock,2399
589,ESP,LTD EC-1000FR Black Sa B-Stock,1025
