In [4]:
import csv
import re
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By

def company_names():
    # Define the URL to scrape company codes
    url = "https://www.mse.mk/mk/stats/symbolhistory/kmb"
    print(f"Accessing URL: {url}")
    
    # Set up the Chrome WebDriver with headless option
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(options=chrome_options)
    
    try:
        # Open the page
        driver.get(url)
        print("Page loaded successfully.")

        # Find the dropdown and get the company codes
        print("Locating dropdown by ID 'Code'...")
        select_element = Select(driver.find_element(By.ID, "Code"))
        company_codes = [option.get_attribute("value") for option in select_element.options if 
                         option.get_attribute("value") and not re.search(r'\d', option.get_attribute("value"))]
        print(f"Found {len(company_codes)} company codes: {company_codes}")
        
        # Prepare a list to store company data (ID, code, name)
        companies = []

        # Loop through each company code with an ID starting from 1
        for index, code in enumerate(company_codes, start=1):
            print(f"Processing company code {code} (ID: {index})...")
            company_url = f"https://www.mse.mk/mk/symbol/{code}"
            print(f"Navigating to {company_url}...")
            driver.get(company_url)

            # Check if the page redirects to a company page (like Komercijalna Banka)
            if "issuer" in driver.current_url:
                print("Detected 'issuer' in URL.")
                try:
                    company_name = driver.find_element(By.CSS_SELECTOR, "div#izdavach div.col-md-8.title").text
                    print(f"Extracted company name: {company_name}")
                except Exception as e:
                    print(f"Error extracting company name (structure 1): {e}")
                    company_name = "Name not found"
            else:
                print("No 'issuer' in URL. Attempting alternative structure...")
                try:
                    raw_name = driver.find_element(By.ID, "titleKonf2011").text
                    print(f"Raw company name: {raw_name}")  # Debugging: Print raw company name
                    company_name = raw_name.split(" - ", 2)[-1] if " - " in raw_name else raw_name
                    print(f"Processed company name: {company_name}")  # Debugging: Print processed name
                except Exception as e:
                    print(f"Error extracting company name (structure 2): {e}")
                    company_name = "Name not found"
            
            # Add the company ID, code, and name to the list
            companies.append([index, code, company_name])
        
              # Ensure the directory exists
            output_dir = "stockapp/data"
            os.makedirs(output_dir, exist_ok=True)  # Create the directory if it doesn't exist
            output_file = os.path.abspath(f"{output_dir}/company_codes_and_names.csv")
            
            # Print the file location
            print(f"Saving data to: {output_file}")
            with open(output_file, mode='w', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(["Company ID", "Company Code", "Company Name"])  # Write the header
                writer.writerows(companies)  # Write the company data

    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        # Quit the driver
        driver.quit()
        print("Web driver closed.")

# Run the function to scrape and save the data
company_names()


Accessing URL: https://www.mse.mk/mk/stats/symbolhistory/kmb
Page loaded successfully.
Locating dropdown by ID 'Code'...
Found 171 company codes: ['ADIN', 'ALK', 'ALKB', 'AMBR', 'AMEH', 'APTK', 'ATPP', 'AUMK', 'BANA', 'BGOR', 'BIKF', 'BIM', 'BLTU', 'CBNG', 'CDHV', 'CEVI', 'CKB', 'CKBKO', 'DEBA', 'DIMI', 'EDST', 'ELMA', 'ELNC', 'ENER', 'ENSA', 'EUHA', 'EUMK', 'EVRO', 'FAKM', 'FERS', 'FKTL', 'FROT', 'FUBT', 'GALE', 'GDKM', 'GECK', 'GECT', 'GIMS', 'GRDN', 'GRNT', 'GRSN', 'GRZD', 'GTC', 'GTRG', 'IJUG', 'INB', 'INDI', 'INEK', 'INHO', 'INOV', 'INPR', 'INTP', 'JAKO', 'JULI', 'JUSK', 'KARO', 'KDFO', 'KJUBI', 'KKFI', 'KKST', 'KLST', 'KMB', 'KMPR', 'KOMU', 'KONF', 'KONZ', 'KORZ', 'KPSS', 'KULT', 'KVAS', 'LAJO', 'LHND', 'LOTO', 'LOZP', 'MAGP', 'MAKP', 'MAKS', 'MB', 'MERM', 'MKSD', 'MLKR', 'MODA', 'MPOL', 'MPT', 'MPTE', 'MTUR', 'MZHE', 'MZPU', 'NEME', 'NOSK', 'OBPP', 'OILK', 'OKTA', 'OMOS', 'OPFO', 'OPTK', 'ORAN', 'OSPO', 'OTEK', 'PELK', 'PGGV', 'PKB', 'POPK', 'PPIV', 'PROD', 'PROT', 'PTRS', 'RADE