In [3]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [20]:
boot_camps = ['le-wagon', 'brainstation', 'simplilearn']

base_url = 'https://www.coursereport.com/schools/{}?shared_review={}#reviews'

all_reviews = []

for boot_camp in boot_camps:
    page = 1

    while True:
        # Get page content
        url = base_url.format(boot_camp, page)
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to fetch page {page} for {boot_camp}. Exiting...")
            break

        soup = BeautifulSoup(response.content, 'html.parser')
        review_elements = soup.find_all('li', class_='py-6')

        # Break the loop if no more reviews are found
        if not review_elements:
            print(f"No more reviews found for {boot_camp} on page {page}. Exiting...")
            break

        # Extract information from each review
        for review in review_elements:
            # Extract Field of Bootcamp
            field_text = None
            fields = review.find('div', class_='flex text-gray-medium space-x-4')
            if fields:
                raw_text = fields.text.strip()  # Extract raw text
                # Split on "•"
                segments = [segment.strip() for segment in raw_text.split('•') if segment.strip()]
                # Find the segment containing the bootcamp name
                for segment in segments:
                    if 'Bootcamp' in segment:
                        field_text = segment
                        break

            # Extract review date
            review_date = review.find('div', class_='text-gray-medium flex-shrink-0')
            review_date = review_date.text.strip() if review_date else None

            # Extract all categories and their respective star ratings
            ratings = {}
            categories = review.select('div.grid.grid-cols-2')  # Select all category sections
            for category in categories:
                category_name = category.find('div')
                stars = category.select('div.flex svg.fill-current.text-orange')
                if category_name and stars:
                    ratings[category_name.text.strip()] = len(stars)

            # Extract review content
            review_content = review.find('div', class_='text-sm text-gray-darkest leading-loose break-words')
            review_content = review_content.text.strip() if review_content else None

            # Combine all data into the reviews list
            all_reviews.append({
                'bootcamp': boot_camp,
                'review_date': review_date,
                'field': field_text,
                'Overall Experience': ratings.get('Overall Experience', None),
                'Job Assistance': ratings.get('Job Assistance', None),
                'Instructors': ratings.get('Instructors', None),
                'Curriculum': ratings.get('Curriculum', None),
                'review_content': review_content
            })

        # Increment the page counter
        if page > 1000:
            break
        else:
            page += 1


In [21]:
df = pd.DataFrame(all_reviews)
df.shape

(37668, 8)

In [22]:
df = pd.DataFrame(all_reviews)
df.drop_duplicates(inplace=True)
print(df.shape)
df.head()

(1676, 8)


Unnamed: 0,bootcamp,review_date,field,Overall Experience,Job Assistance,Instructors,Curriculum,review_content
0,le-wagon,"Sep 15, 2014",,5,5.0,5,5,I was a student at MakerSquare and I am curren...
1,le-wagon,"Dec 20, 2024",Consultant â€¢ Student â€¢ Data Science & AI B...,5,,5,5,"Great bootcamp, great content and teachers.Â 9..."
2,le-wagon,"Dec 17, 2024",Graduate â€¢ Web Development Bootcamp - Full-t...,5,5.0,5,5,Attending Le Wagon Cape Town's Web Development...
3,le-wagon,"Dec 16, 2024",Student â€¢ Web Development Bootcamp - Full-ti...,5,,5,5,I really appreciated the focus of the bootcamp...
4,le-wagon,"Dec 16, 2024",Graduate â€¢ Web Development Bootcamp - Full-t...,5,5.0,5,5,Le Wagon was an incredible journey. The bootca...


In [23]:
df['bootcamp'].value_counts()

bootcamp
le-wagon        570
brainstation    553
simplilearn     553
Name: count, dtype: int64

In [27]:
df['Overall Experience'].value_counts()

Overall Experience
5    1676
Name: count, dtype: int64