In [2]:
import re
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_page_data(url):
    html = requests.get(url).text
    soup = BeautifulSoup(html, 'lxml')

    app = [i.text.strip().split("\n")[0] for i in soup.find_all('h5', class_="mt-3")]
    link = [i.get("href") for i in soup.find_all("a", attrs={"href": re.compile("^http")})]
    desc = [i.text for i in soup.find_all("p", class_="my-1")]
    price = [i.text for i in soup.find_all("span", class_="float-end")]

    max_length = max(len(app), len(link), len(desc), len(price))

    # Perform padding to bring all arrays to the same length
    app += [''] * (max_length - len(app))
    link += [''] * (max_length - len(link))
    desc += [''] * (max_length - len(desc))
    price += [''] * (max_length - len(price))

    return app, link, desc, price

# Fetch the data from the first page
url = "https://topai.tools/browse"
total_pages = 200  # Specify the total number of pages to scrape (you can change this as needed)

# Initialize lists to store data
all_app = []
all_link = []
all_desc = []
all_price = []

# Loop through all pages and scrape data
for page in range(1, total_pages + 1):
    page_url = f"{url}?page={page}"
    app, link, desc, price = scrape_page_data(page_url)

    # Extend the lists with data from the current page
    all_app.extend(app)
    all_link.extend(link)
    all_desc.extend(desc)
    all_price.extend(price)

df = pd.DataFrame({
    "Tool_name": all_app,
    "Tool_URL": all_link,
    "Tool_Price": all_price,
    "Tool_Uses": all_desc,
})

# Export the DataFrame to a CSV file named "output.csv"
df.to_csv("output.csv", index=False)

# Display the DataFrame
print(df)
print('Data is scrapped and stored in csv file')

     Tool_name                                         Tool_URL  Tool_Price  \
0                                https://twitter.com/ToolsTopai  Free trial   
1                               https://facebook.com/topaitools    Freemium   
2                              https://instagram.com/topaitools    Freemium   
3                  https://www.linkedin.com/company/topaitools/    Freemium   
4                      https://www.helplook.net/?via=topaitools    Freemium   
...        ...                                              ...         ...   
4595            https://www.eraser.io/diagramgpt?via=topaitools               
4596                 https://takethisandrun.com/?via=topaitools               
4597               https://www.supremeplans.com/?via=topaitools               
4598                         https://cookme.app/?via=topaitools               
4599                             https://twitter.com/ToolsTopai               

                                              Tool_

In [4]:
print("Length of link:", len(link))
print("Length of desc:", len(desc))
print("Length of price:", len(price))

Length of link: 23
Length of desc: 23
Length of price: 23
