In [1]:
import pandas as pd
import time
from bs4 import BeautifulSoup
import re
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import requests

In [2]:
# Base URL of the PSE Edge company directory
base_url = "https://edge.pse.com.ph/companyDirectory/search.ax"

# Empty list to hold results
all_data = []

# Get data from all pages
for page in range(1, 7):
    payload = {
        'pageNo': page,
        'keyword': '',
        'sortType': '',
        'dateSortType': 'DESC',
        'cmpySortType': 'ASC',
        'symbolSortType': 'ASC',
        'sector': 'ALL',
        'subsector': ''
    }
    
    # Send POST request (the site loads data via AJAX)
    response = requests.post(base_url, data=payload)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all company rows inside the main table
    rows = soup.select('table.list tbody tr')
    
    for row in rows:
        cols = row.find_all('td')
        if len(cols) == 5:
            company_name = cols[0].text.strip()
            stock_symbol = cols[1].text.strip()
            sector = cols[2].text.strip()
            subsector = cols[3].text.strip()
            listing_date = cols[4].text.strip()
            
            all_data.append({
                "Company Name": company_name,
                "Stock Symbol": stock_symbol,
                "Sector": sector,
                "Subsector": subsector
            })
    
    # Take break from requests
    time.sleep(1)

# Convert to df
df_pse = pd.DataFrame(all_data)

In [3]:
# Define your target sectors and subsectors
target_sectors = [
    "Industrial",
    "Holding Firms"
]

target_subsectors = [
    "Electricity, Energy, Power, & Water",
    "Construction, Infra. & Allied Services",
    "Holding Firms"
]

# Filter df_pse where either Sector or Subsector matches
df_pse_final = df_pse[
    (df_pse['Sector'].isin(target_sectors)) |
    (df_pse['Subsector'].isin(target_subsectors))
].copy()

# Reset index for cleanliness
df_pse_final.reset_index(drop=True, inplace=True)

df_pse_final = df_pse_final[['Company Name', 'Stock Symbol']]

df_pse_final.to_csv('filtered_pse_companies.csv', index=False)