In [None]:
__doc__ = """
Web Scraping from Live Website

This script requests an online webpage, extracts company details, and saves the data into a Pandas DataFrame.
It uses requests to fetch the webpage and BeautifulSoup to parse the data.

Dependencies:
    - Requests (`pip install requests`)
    - BeautifulSoup4 (`pip install beautifulsoup4`)
    - Pandas (`pip install pandas`)

Usage:
    Run the script to extract company details from a live website:
        python scrape_from_request.py

Output:
    - Prints extracted company details
    - Saves data to 'ambitionbox_companies.csv'
"""

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [None]:
CompanyData = pd.DataFrame()

for k in range(1,30):
    url = "https://www.ambitionbox.com/list-of-companies?page={}".format(k)

    headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
    "Accept-Encoding": "gzip, deflate, br",
    "Referer": "https://www.google.com/",
    "Connection": "keep-alive"
    }

    html_source = requests.get(url, headers=headers, timeout=10).text

    soup = BeautifulSoup(html_source, 'lxml')

    company = soup.find_all('div', class_="companyCardWrapper__primaryInformation")

    Name = []
    Rating = []
    Review_Count = []
    Tags = []

    for i in company:
        Name.append(i.find('a', class_="companyCardWrapper__companyName").get_text(strip=True))
        
        rating = i.find('div', class_="rating_text rating_text--md")
        Rating.append(rating.get_text(strip=True) if rating else "NaN")

        review = i.find('span', class_="companyCardWrapper__companyRatingCount")
        Review_Count.append(review.get_text(strip=True) if review else "NaN")

        tag = i.find('span', class_="companyCardWrapper__interLinking")
        Tags.append(tag.get_text(strip=True) if tag else "NaN")

    data = {'Name':Name, 'Rating':Rating, 'Review_Count':Review_Count, 'Tags':Tags}    
    df = pd.DataFrame(data)

    CompanyData = CompanyData.append(df, ignore_index=True)

In [None]:
CompanyData.to_csv('ambitionbox_companies.csv')