In [29]:
__doc__ = """
Web Scraping from a Saved HTML File

This script extracts company details (name, rating, reviews, and tags) from a locally saved HTML file.
It uses BeautifulSoup for parsing and converts the extracted data into a Pandas DataFrame.

Dependencies:
    - BeautifulSoup4 (`pip install beautifulsoup4`)
    - Pandas (`pip install pandas`)

Usage:
    Run the script to extract company details:
        python scrape_from_file.py

Output:
    - Prints extracted company details
    - Saves data to 'ambitionbox_companies.csv'
"""

In [30]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [31]:
pd.set_option('display.max_colwidth', 100 )

In [32]:
with open('page_source_ambitionbox.html', 'r', encoding="utf-8") as file:
    html_source = file.read()

In [33]:
soup = BeautifulSoup(html_source, 'lxml')

In [34]:
company = soup.find_all('div', class_="companyCardWrapper__primaryInformation")

In [36]:
Name = []
Rating = []
Review_Count = []
Tags = []

for i in company:
    Name.append(i.find('a', class_="companyCardWrapper__companyName").get_text(strip=True))
    
    rating = i.find('div', class_="rating_text rating_text--md")
    Rating.append(rating.get_text(strip=True) if rating else "NaN")

    review = i.find('span', class_="companyCardWrapper__companyRatingCount")
    Review_Count.append(review.get_text(strip=True) if review else "NaN")

    tag = i.find('span', class_="companyCardWrapper__interLinking")
    Tags.append(tag.get_text(strip=True) if tag else "NaN")

In [37]:
data = {'Name':Name, 'Rating':Rating, 'Review_Count':Review_Count, 'Tags':Tags}

In [38]:
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Rating,Review_Count,Tags
0,TCS,3.7,(90.5k),IT Services & Consulting | Bangalore / Bengaluru +390 other locations
1,Accenture,3.8,(57k),IT Services & Consulting | Bangalore / Bengaluru +205 other locations
2,Wipro,3.7,(53.5k),IT Services & Consulting | Bangalore / Bengaluru +342 other locations
3,Cognizant,3.7,(50.6k),IT Services & Consulting | Chennai +192 other locations
4,Capgemini,3.7,(42.1k),IT Services & Consulting | Bangalore / Bengaluru +168 other locations
5,HDFC Bank,3.9,(40k),Banking | Mumbai +1659 other locations
6,Infosys,3.6,(39.7k),IT Services & Consulting | Bangalore / Bengaluru +214 other locations
7,ICICI Bank,4.0,(38.4k),Banking | Mumbai +1355 other locations
8,HCLTech,3.5,(36.5k),IT Services & Consulting | Chennai +209 other locations
9,Tech Mahindra,3.5,(35.4k),IT Services & Consulting | Hyderabad / Secunderabad +294 other locations


In [39]:
df.to_csv('ambitionbox_companies.csv')