<a href="https://colab.research.google.com/github/BENMEZIAN/Google-colab/blob/main/Web_scraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Scrape elements of **https://dz.kompass.com/x/producer/a/engrais-organiques/22250/**

In [None]:
import requests
from bs4 import BeautifulSoup
import csv

# Send an HTTP request to the URL
url = "https://dz.kompass.com/x/producer/a/engrais-organiques/22250/"
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract company listings
    company_listings = soup.find_all('div', class_='prod_list')

    # Open a CSV file in write mode
    with open('company_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
        # Define CSV writer
        writer = csv.DictWriter(csvfile, fieldnames=["Company Name", "Commune", "Wilaya", "Summary", "Supplies", "Phone", "Email", "Website"])

        # Write header
        writer.writeheader()

        # Loop through each company listing and extract information
        for company_listing in company_listings:
            # Extract company name
            company_name = company_listing.find('span', class_='titleSpan').text.strip()

            # Extract location (commune and wilaya)
            location_span = company_listing.find('span', class_='placeText')
            location_text = location_span.text.strip()
            commune, wilaya = location_text.split(' - ')

            # Extract summary
            summary = company_listing.find('span', class_='product-summary').text.strip()

            # Extract supplies
            supplies = [li.text.strip() for li in company_listing.find('ul').find_all('li')]

            # Extract phone number
            phone = company_listing.find('a', class_='showMobile').text.strip()

            # Extract email (if available)
            email = "Unknown"  # Not found in provided HTML snippet

            # Extract website
            website_anchor = company_listing.find('a', href=True)
            website = website_anchor['href'] if website_anchor else "Website not found"

            # Write row to CSV file
            writer.writerow({"Company Name": company_name,
                             "Commune": commune,
                             "Wilaya": wilaya,
                             "Summary": summary,
                             "Supplies": ', '.join(supplies),
                             "Phone": phone,
                             "Email": email,
                             "Website": website})

    print("Data has been saved to company_data.csv")
else:
    print("Failed to retrieve data from the URL:", url)

Scrape elements of **https://www.pagesmaghreb.com/entreprises/chimie-et-pharmacie/engrais-et-fertilisants-production/algerie**

In [None]:
import requests
from bs4 import BeautifulSoup
import csv

# Function to extract data from each page
def extract_data_from_page(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        company_divs = soup.find_all('div', class_='relative md:flex items-center bg-white rounded-md border shadow mb-4 max-w-screen-md')

        results = []

        for company_div in company_divs:
            company_name = company_div.find('h2', class_='transition text-lg font-bold tracking-tight text-gray-700 hover:text-gray-900').text.strip()
            location = company_div.find('span', class_='flex items-center justify-center md:justify-start text-xs text-gray-600 font-bold').text.strip()
            function = company_div.find_all('p', class_='font-normal text-xs text-gray-600')[0].text.strip()
            prestation = company_div.find_all('p', class_='font-normal text-xs text-gray-600')[1].text.strip()

            results.append({
                'nom': company_name,
                'commune - wilaya': location,
                'fonction': function,
                'prestation': prestation
            })

        return results
    else:
        print(f"Failed to fetch data from URL: {url}")
        return []

# URLs of the pages
urls = [
    "https://www.pagesmaghreb.com/entreprises/chimie-et-pharmacie/engrais-et-fertilisants-production/algerie?&page=1",
    "https://www.pagesmaghreb.com/entreprises/chimie-et-pharmacie/engrais-et-fertilisants-production/algerie?&page=2",
    "https://www.pagesmaghreb.com/entreprises/chimie-et-pharmacie/engrais-et-fertilisants-production/algerie?&page=3"
]

# Extract data from each page
all_results = []
for url in urls:
    all_results.extend(extract_data_from_page(url))

# Write the results to a CSV file
with open('companies_data.csv', mode='w', newline='', encoding='utf-8') as file:
    fieldnames = ['nom', 'commune - wilaya', 'fonction', 'prestation']
    writer = csv.DictWriter(file, fieldnames=fieldnames)

    writer.writeheader()
    for result in all_results:
        writer.writerow(result)

print("Data extraction and writing to CSV completed successfully.")

Scrape elements of **http://www.made-in-algeria.com/data/et_recherche.php?mode_recherche_et=ps&id_ps=204373**

In [None]:
from bs4 import BeautifulSoup
import requests
import csv

# Send a GET request to the webpage
url = "http://www.made-in-algeria.com/data/et_recherche.php?mode_recherche_et=ps&id_ps=204373"
response = requests.get(url)

# Parse the HTML content
if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all the tables with the specified attributes
    tables = soup.find_all('table', attrs={'width': '98%', 'border': '0', 'cellpadding': '0', 'cellspacing': '0', 'bgcolor': '#FFFFFF'})

    # Extract data from each table
    for table in tables:
        # Find the table rows
        rows = table.find_all('tr')

        # Extract information from the first row
        first_row = rows[0]
        company_name = first_row.find('b', class_='tsize14').text.strip()

        # Extract information from the second row
        second_row = rows[1]
        address = second_row.find('td').text.strip()
        telephone = second_row.find_all('td')[1].text.strip().split(' - ')[0]
        fax = second_row.find_all('td')[1].text.strip().split(' - ')[1]

        # Extract products from the third row
        third_row = rows[2]
        products = third_row.find('td', class_='txt_gray1').text.strip()

        # Write the extracted information to a CSV file
        with open('company_info.csv', mode='w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(['Company Name', 'Address', 'Telephone', 'Fax', 'Products'])
            writer.writerow([company_name, address, telephone, fax, products])

        print("Data written to company_info.csv successfully.")
else:
    print("Failed to retrieve the webpage")

Scrape elements of **https://www.goafricaonline.com/dz/annuaire/industrie-pharmaceutique**

In [None]:
import requests
from bs4 import BeautifulSoup
import csv

# Function to extract data from each company card
def extract_data_from_card(card):
    company_name = card.find('a', class_='stretched-link').text.strip()
    industry = card.find('div', class_='text-14 text-brand-blue').text.strip()
    address = card.find('address').text.strip()
    telephone = card.find('a', class_='text-13').text.strip().split(':')[1].strip()
    return [company_name, industry, address, telephone]

# Send a GET request to the webpage
url = 'https://www.goafricaonline.com/dz/annuaire/industrie-pharmaceutique'
response = requests.get(url)

# Parse the HTML content
if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract company information from each card
    companies = []
    company_cards = soup.find_all('article', class_='relative')
    for card in company_cards:
        company_data = extract_data_from_card(card)
        companies.append(company_data)

    # Write the extracted information to a CSV file
    with open('pharmaceutical_companies.csv', mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Company Name', 'Industry', 'Address', 'Telephone'])
        writer.writerows(companies)

    print("Data written to pharmaceutical_companies.csv successfully.")
else:
    print(f"Failed to retrieve the webpage: {url}")