### Step 1: Install Required Libraries

In [None]:
pip install selenium webdriver-manager pandas beautifulsoup4

### Step 2: Import Libraries

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import re  # For cleaning text using regex
from bs4 import BeautifulSoup

import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

### Step 3: Set Up Selenium WebDriver

In [3]:
# Set up Selenium WebDriver
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode
chrome_options.add_argument("--disable-gpu")  # Disable GPU acceleration
chrome_options.add_argument("--window-size=1920,1080")  # Set window size

# Initialize the WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

### Step 4: Load the Web Page
Load the target URL and wait for the initial cards to load.

In [5]:
# URL of the page to scrape
url = "https://www.coursera.org/career-academy?trk_ref=globalnav"

# Load the page
driver.get(url)

# Wait for the initial cards to load
try:
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CLASS_NAME, "cds-9.css-b1mnpw.cds-11.cds-grid-item"))
    )
except Exception as e:
    print(f"Error waiting for initial cards to load: {e}")
    driver.quit()
    exit()

### Step 5: Click the "View All" Button
Locate and click the "View all (31 more)" button to load additional cards.

In [7]:
# Click the "View all (31 more)" button to load additional cards
try:
    view_all_button = driver.find_element(By.CSS_SELECTOR, 'button[data-track-component="view_all_career_cards"]')
    view_all_button.click()
    
    # Wait for the additional cards to load
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CLASS_NAME, "cds-9.css-b1mnpw.cds-11.cds-grid-item"))
    )
except Exception as e:
    print(f"Error clicking the 'View all' button or waiting for additional cards: {e}")
    driver.quit()
    exit()

### Step 6: Parse the Page with BeautifulSoup
Use BeautifulSoup to parse the page source and extract all cards.

In [9]:
# Get the page source and parse it with BeautifulSoup
soup = BeautifulSoup(driver.page_source, 'html.parser')

# Find all the cards with the specified class
cards = soup.find_all('div', class_='cds-9 css-b1mnpw cds-11 cds-grid-item')

print(f"Number of cards found: {len(cards)}")

Number of cards found: 43


### Step 7: Extract Data from Each Card
Loop through each card and extract the title, description, median salary, and jobs available. Use regex to clean the salary and jobs available text.

In [12]:
# Initialize a list to store the data
data = []

# Loop through each card and extract the details
for card in cards:
    try:
        # Extract the title
        title_element = card.find('h2', class_='cds-119 cds-Typography-base css-bbd009 cds-121')
        
        if title_element:
            title = title_element.text.strip()
        else:
            title = "Title not found"
            print("Title not found in card:", card.prettify())  # Debugging: Print the card HTML
        
        # Extract the description
        description = card.find('p', class_='css-4s48ix').text.strip()
        
        # Extract the salary and jobs available
        salary_jobs = card.find('div', class_='css-hr97go').text.strip()
        
        # Use regex to extract only the numbers
        median_salary = re.search(r'\$\d{1,3}(?:,\d{3})*', salary_jobs).group()  # Extract $90,500
        jobs_available = re.search(r'\d{1,3}(?:,\d{3})*', salary_jobs.split('jobs')[0]).group()  # Extract 82,489
        
        # Append the extracted data to the list
        data.append({
            'Title': title,
            'Description': description,
            'Median Salary': median_salary,
            'Jobs Available': jobs_available
        })
    except AttributeError as e:
        print(f"Error extracting data from a card: {e}")
        continue

### Step 8: Convert Data to a DataFrame
Convert the list of dictionaries into a pandas DataFrame.

In [14]:
# Convert the list to a DataFrame
df = pd.DataFrame(data)

# Display the DataFrame as a table
print(df)

                               Title  \
0                       Data Analyst   
1                    Project Manager   
2         Cybersecurity Professional   
3                        UX Designer   
4              IT Support Specialist   
5                   Digital Marketer   
6                     Data Scientist   
7               Supply Chain Analyst   
8                    Product Manager   
9               Full Stack Developer   
10                        Bookkeeper   
11               Front-End Developer   
12             Social Media Marketer   
13     Business Intelligence Analyst   
14             Application Developer   
15        Human Resources Specialist   
16                     Data Engineer   
17                Back-End Developer   
18                   DevOps Engineer   
19             Technology Consultant   
20       Sales Operations Specialist   
21                     iOS Developer   
22                 Marketing Analyst   
23                IT Project Manager   


### Step 9: Save the DataFrame as a CSV File
Save the DataFrame to a CSV file for further analysis or sharing.

In [16]:
# Save the DataFrame to a CSV file
df.to_csv('career_academy_data.csv', index=False)

print("Data saved to 'career_academy_data.csv'")

Data saved to 'career_academy_data.csv'


### Step 10: Close the Browser
Close the Selenium WebDriver to free up resources.

In [19]:
# Close the browser
driver.quit()

In [20]:
df

Unnamed: 0,Title,Description,Median Salary,Jobs Available
0,Data Analyst,"Collect, organize, and transform data to make ...","$90,500",90500
1,Project Manager,Oversee the planning and execution of projects...,"$102,800",102800
2,Cybersecurity Professional,Develop strategies to protect organizations fr...,"$119,700",119700
3,UX Designer,Make digital and physical products easier and ...,"$121,200",121200
4,IT Support Specialist,Evaluate and troubleshoot technology issues so...,"$56,200",56200
5,Digital Marketer,Define and develop digital strategies to deliv...,"$61,300",61300
6,Data Scientist,Extract and analyze data to make informed busi...,"$138,100",138100
7,Supply Chain Analyst,Streamline supply chain operations and drive e...,"$65,900",65900
8,Product Manager,"Oversee the entirety of a product's lifecycle,...","$140,200",140200
9,Full Stack Developer,Create both front-end and back-end components ...,"$129,900",129900


In [29]:
# Convert 'Median Salary' to numeric for sorting
df['Salary'] = df['Median Salary'].replace('[\$,]', '', regex=True).astype(float)

# Sort by 'Median Salary' in descending order
df_sorted = df.sort_values(by='Salary', ascending=False)

# Convert 'Median Salary' back to formatted string
df_sorted['Salary'] = df_sorted['Salary'].apply(lambda x: f"${x:,.0f}")

# Display the sorted DataFrame
print(df_sorted[['Title', 'Salary']])

                               Title    Salary
17                Back-End Developer  $148,400
33           Cloud Support Associate  $141,200
24                 Android Developer  $141,200
21                     iOS Developer  $140,200
8                    Product Manager  $140,200
6                     Data Scientist  $138,100
18                   DevOps Engineer  $135,000
11               Front-End Developer  $130,900
16                     Data Engineer  $130,000
9               Full Stack Developer  $129,900
39              Mobile App Developer  $124,800
31               Solutions Architect  $122,800
38                     Game Designer  $121,200
3                        UX Designer  $121,200
23                IT Project Manager  $120,200
2         Cybersecurity Professional  $119,700
14             Application Developer  $114,600
13     Business Intelligence Analyst  $110,000
19             Technology Consultant  $104,300
1                    Project Manager  $102,800
42           

In [31]:
df

Unnamed: 0,Title,Description,Median Salary,Jobs Available,Salary
0,Data Analyst,"Collect, organize, and transform data to make ...","$90,500",90500,90500.0
1,Project Manager,Oversee the planning and execution of projects...,"$102,800",102800,102800.0
2,Cybersecurity Professional,Develop strategies to protect organizations fr...,"$119,700",119700,119700.0
3,UX Designer,Make digital and physical products easier and ...,"$121,200",121200,121200.0
4,IT Support Specialist,Evaluate and troubleshoot technology issues so...,"$56,200",56200,56200.0
5,Digital Marketer,Define and develop digital strategies to deliv...,"$61,300",61300,61300.0
6,Data Scientist,Extract and analyze data to make informed busi...,"$138,100",138100,138100.0
7,Supply Chain Analyst,Streamline supply chain operations and drive e...,"$65,900",65900,65900.0
8,Product Manager,"Oversee the entirety of a product's lifecycle,...","$140,200",140200,140200.0
9,Full Stack Developer,Create both front-end and back-end components ...,"$129,900",129900,129900.0
