In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import json
import requests



# get the content of html file 

In [2]:

page = requests.get("https://www.baraasallout.com/test.html")
soup = BeautifulSoup(page.text, 'html.parser')



# functions to save data into csv and json files 

In [3]:
def save_csv(data, filename):
    pd.DataFrame(data).to_csv(filename, index=False)
    print(f"Saved: {filename}")



In [4]:

def save_json(data, filename):
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

# Step 1: Extract Text Data

In [8]:
h_1 = soup.find_all('h1')
h_1



[<h1>Web Scraping Practice</h1>]

In [9]:
h_2 = soup.find_all('h2')
h_2


[<h2>Available Products</h2>,
 <h2>Product Table</h2>,
 <h2>Watch This Video</h2>,
 <h2>Contact Us</h2>,
 <h2>Product Information</h2>,
 <h2>Featured Products</h2>]

In [10]:
all_p = soup.find_all('p')
all_p


[<p>Welcome to the web scraping task! Use your skills to extract the required data from this page.</p>,
 <p><strong>Sharp Objects</strong></p>,
 <p style="color: green;">£47.82</p>,
 <p style="color: green;">✔ In stock</p>,
 <p><strong>In a Dark, Dark Wood</strong></p>,
 <p style="color: green;">£19.63</p>,
 <p style="color: green;">✔ In stock</p>,
 <p><strong>The Past Never Ends</strong></p>,
 <p style="color: green;">£56.50</p>,
 <p style="color: green;">✔ In stock</p>,
 <p><strong>A Murder in Time</strong></p>,
 <p style="color: green;">£16.64</p>,
 <p style="color: green;"> Out stock</p>,
 <p class="name">Wireless Headphones</p>,
 <p class="price" style="display: none;">$49.99</p>,
 <p class="colors">Available colors: Black, White, Blue</p>,
 <p class="name">Smart Speaker</p>,
 <p class="price" style="display: none;">$89.99</p>,
 <p class="colors">Available colors: Grey, Black</p>,
 <p class="name">Smart Watch</p>,
 <p class="price" style="display: none;">$149.99</p>,
 <p class="co

In [11]:
all_li = soup.find_all('li')
all_li
 

[<li class="highlight">Laptop</li>,
 <li>Smartphone</li>,
 <li>Tablet</li>,
 <li>Smartwatch</li>]

In [12]:
for li in all_li:
    print(li.get_text())

Laptop
Smartphone
Tablet
Smartwatch


In [13]:
temp = h_1 , h_2 , all_li , all_p 
save_csv(temp , "Extract_Table_Data.csv")

Saved: Extract_Table_Data.csv


# Step 2: Extract Table Data

In [16]:
tables = soup.find_all('table')
tables

[<table>
 <tr>
 <th>Product</th>
 <th>Price</th>
 <th>In Stock</th>
 </tr>
 <tr>
 <td>Laptop</td>
 <td>$1000</td>
 <td>Yes</td>
 </tr>
 <tr>
 <td>Smartphone</td>
 <td>$800</td>
 <td>No</td>
 </tr>
 <tr>
 <td>Tablet</td>
 <td>$500</td>
 <td>Yes</td>
 </tr>
 </table>]

In [22]:

table = soup.find('table')
if table:
    rows = [
        {"Product": cols[0].text.strip(), "Price": cols[1].text.strip(), "in Stock ": cols[2].text.strip()}
        for row in table.find_all('tr')[1:] if (cols := row.find_all('td'))
    ]
    save_csv(rows, "Extract_Table_Data.csv")

Saved: Extract_Table_Data.csv


# Step 3: Extract Product Cards

In [23]:

product_cards = [
    {
        "Book Title": card.find('h3').text.strip(),
        "Price": card.find('p', class_='price').text.strip(),
        "Availability": card.find('p', class_='availability').text.strip(),
        "Button": card.find('button').text.strip()
    }
    for card in soup.find_all('div', class_='book-card')
]
save_json(product_cards, "Product_Information.json")

# Step 4: Extract Form Details

In [24]:
forms = soup.find_all('form')
forms

[<form>
 <label for="username">Username:</label>
 <input id="username" name="username" placeholder="Enter your username" type="text"/>
 <label for="password">Password:</label>
 <input id="password" name="password" placeholder="Enter your password" type="password"/>
 <label for="options">Choose an option:</label>
 <select id="options" name="options">
 <option value="option1">Option 1</option>
 <option value="option2">Option 2</option>
 <option value="option3">Option 3</option>
 </select>
 <label>
 <input name="terms" type="checkbox"/> I agree to the terms and conditions
             </label>
 <input type="submit" value="Submit"/>
 </form>]

In [8]:

form = soup.find('form')
if form:
    form_details = [
        {"Field Name": inp.get('name', 'N/A'), "Type": inp.get('type', 'N/A'), "Default Value": inp.get('value', '')}
        for inp in form.find_all('input')
    ]
    save_json(form_details, "Form_Details.json")

# Step 5: Extract Links and Multimedia

In [9]:

links = [{"Text": a.text.strip(), "Href": a['href']} for a in soup.find_all('a', href=True)]
iframe = soup.find('iframe')
if iframe:
    links.append({"Text": "Video", "Href": iframe.get('src')})
save_json(links, "Links_and_Multimedia.json")

# Step 6: Extract Featured Products

In [10]:
featured_products = [
    {
        "ID": product['data-id'],
        "Name": product.find('span', class_='name').text.strip(),
        "Price": product.find('span', class_='price').text.strip(),
        "Colors": product.find('span', class_='colors').text.strip()
    }
    for product in soup.find_all('div', class_='featured-product')
]
save_json(featured_products, "Featured_Products.json")

# show the results 

In [15]:
df = pd.read_csv("Extract_Text_Data.csv")
df 


Unnamed: 0,Type,Content
0,Heading,Web Scraping Practice
1,Heading,Available Products
2,Heading,Product Table
3,Heading,Watch This Video
4,Heading,Contact Us
5,Heading,Product Information
6,Heading,Featured Products
7,Paragraph,Welcome to the web scraping task! Use your ski...
8,Paragraph,Sharp Objects
9,Paragraph,£47.82


In [12]:
df = pd.read_csv("Extract_Table_Data.csv")
df 


Unnamed: 0,Product,Price,in Stock
0,Laptop,$1000,Yes
1,Smartphone,$800,No
2,Tablet,$500,Yes


In [13]:
print (product_cards)

[]


In [14]:
print (form)

<form>
<label for="username">Username:</label>
<input id="username" name="username" placeholder="Enter your username" type="text"/>
<label for="password">Password:</label>
<input id="password" name="password" placeholder="Enter your password" type="password"/>
<label for="options">Choose an option:</label>
<select id="options" name="options">
<option value="option1">Option 1</option>
<option value="option2">Option 2</option>
<option value="option3">Option 3</option>
</select>
<label>
<input name="terms" type="checkbox"/> I agree to the terms and conditions
            </label>
<input type="submit" value="Submit"/>
</form>


In [15]:
print (links )

[{'Text': 'Video', 'Href': 'https://www.youtube.com/watch?v=ujf9RNuBdCU'}]
