In [48]:
import requests
from bs4 import BeautifulSoup
import csv
import json
import re

def clean_text(text):
    text = text.replace("\u00a3", "").replace("\u2714", "").strip()
    text = re.sub(r'[^\w\s]', '', text)
    return text

url = 'https://www.baraasallout.com/test.html'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Task 1
headings = [h.get_text().strip() for h in soup.find_all(['h1', 'h2'])]
paragraphs = [tag.get_text().strip() for tag in soup.find_all('p')]
list_items = [tag.get_text().strip() for tag in soup.find_all('li')]

with open('Extract_Text_Data.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Section', 'Content'])
    writer.writerow(['Headings', ''])
    for heading in headings:
        writer.writerow(['', heading])
    writer.writerow(['Paragraphs', ''])
    for paragraph in paragraphs:
        writer.writerow(['', paragraph])
    writer.writerow(['List Items', ''])
    for list_item in list_items:
        writer.writerow(['', list_item])

print("Saved to Extract_Text_Data.csv")

# Task 2
table_rows = soup.find_all('tr')
table_data = []

for row in table_rows:
    cols = row.find_all('td')
    cols = [col.get_text().strip() for col in cols]
    if len(cols) == 3:
        table_data.append(cols)

with open('Extract_Table_Data.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Product Name', 'Price', 'Stock Status'])
    for data in table_data:
        writer.writerow(data)

print("Saved to Extract_Table_Data.csv")

# Task 3
book_cards = soup.find_all('div', attrs={"style": lambda value: value and 'border' in value and 'padding' in value})
books_info = []

for card in book_cards:
    title_tag = card.find('strong')
    book_title = clean_text(title_tag.text) if title_tag else "Unknown"

    price_tag = card.find('p', attrs={"style": lambda style: style and "color: green" in style})
    price = clean_text(price_tag.text) if price_tag else "N/A"

    stock_info = card.find_all('p')[-1].text if card.find_all('p') else "N/A"
    stock_info_clean = clean_text(stock_info)

    btn = card.find('button')
    button_text = clean_text(btn.text) if btn else "Add to Basket"

    books_info.append({
        "Book Title": book_title,
        "Price": price,
        "Stock Availability": stock_info_clean,
        "Button Text": button_text
    })

with open('Product_Information.json', 'w', encoding="utf-8") as bookfile:
    json.dump(books_info, bookfile, indent=4)

print("Saved to Product_Information.json")



# Task 4
forms = soup.find_all('form')
form_details = []

for form in forms:
    inputs = form.find_all('input')
    for input_tag in inputs:
        input_data = {
            'Field Name': input_tag.get('name'),
            'Input Type': input_tag.get('type'),
            'Default Value': input_tag.get('value', '')
        }
        form_details.append(input_data)

with open('Form_Details.json', 'w', encoding="utf-8") as json_file:
    json.dump(form_details, json_file, indent=4)

print("Saved to Form_Details.json")

# Task 5
links = soup.find_all('a', href=True)
iframes = soup.find_all('iframe', src=True)

link_data = [{'Text': link.get_text(), 'Href': link['href']} for link in links]
video_data = [{'Src': iframe['src']} for iframe in iframes]

all_data = {
    'Links': link_data,
    'Videos': video_data
}

with open('Links_and_Multimedia.json', 'w', encoding="utf-8") as json_file:
    json.dump(all_data, json_file, indent=4)

print("Saved to Links_and_Multimedia.json")

# Task 6
products_data = []
for product in soup.find_all('div', class_='product-card'):
    product_id = product.get('data-id')
    name_tag = product.find('p', class_='name')
    price_tag = product.find('p', class_='price', style='display: none;')
    colors_tag = product.find('p', class_='colors')

    name = name_tag.text if name_tag else 'N/A'
    price = price_tag.text if price_tag else 'N/A'
    colors = colors_tag.text.replace('Available colors: ', '') if colors_tag else 'N/A'

    product_info = {
        'id': product_id,
        'name': name,
        'price': price,
        'colors': colors
    }
    products_data.append(product_info)

with open('Featured_Products.json', 'w', encoding="utf-8") as file:
    json.dump(products_data, file, indent=4)

print("Saved to Featured_Products.json")


Saved to Extract_Text_Data.csv
Saved to Extract_Table_Data.csv
Saved to Product_Information.json
Saved to Form_Details.json
Saved to Links_and_Multimedia.json
Saved to Featured_Products.json
