In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time

def extract_data_with_retry(symbol, css_selector, retries=5, delay=6):
    for attempt in range(retries):
        driver = webdriver.Chrome()
        profile_url = f'https://finance.yahoo.com/quote/{symbol}/profile'
        driver.get(profile_url)
        time.sleep(13)  
        try:
            element = driver.find_element(By.CSS_SELECTOR, css_selector).text
            driver.quit()
            return element
        except NoSuchElementException:
            print(f"Attempt {attempt + 1} for {symbol} failed")
            driver.quit()
            if attempt < retries - 1:
                time.sleep(5)
    raise NoSuchElementException(f"Failed to find element after {retries} attempts for {symbol}")

def scrape_stock_profile(symbol):
    company_name = extract_data_with_retry(symbol, '#Col1-0-Profile-Proxy > section > div.asset-profile-container > div > h3')
    address = extract_data_with_retry(symbol, '#Col1-0-Profile-Proxy > section > div.asset-profile-container > div > div > p.D\\(ib\\).W\\(47\\.727\\%\\).Pend\\(40px\\)')
    description = extract_data_with_retry(symbol, '#Col1-0-Profile-Proxy > section > section.quote-sub-section.Mt\\(30px\\) > p')

    driver = webdriver.Chrome()
    driver.get(f'https://finance.yahoo.com/quote/{symbol}/profile')
    time.sleep(13)
    executives = []
    try:
        executives_table = driver.find_element(By.CSS_SELECTOR, '#Col1-0-Profile-Proxy > section > section.Bxz\\(bb\\).quote-subsection.undefined > table > tbody')
        rows = executives_table.find_elements(By.TAG_NAME, 'tr')
        for row in rows:
            cols = row.find_elements(By.TAG_NAME, 'td')
            if len(cols) > 1:
                executives.append({
                    'Name': cols[0].text.strip(),
                    'Title': cols[1].text.strip()
                })
    finally:
        driver.quit()

    return {
        'Symbol': symbol,
        'Company Name': company_name,
        'Address': address,
        'Description': description,
        'Executives': executives
    }

stocks = ['JPM', 'GS', 'MS', 'C', 'WFC']
all_data = []

for stock in stocks:
    try:
        stock_profile = scrape_stock_profile(stock)
        for executive in stock_profile['Executives']:
            all_data.append({
                'Symbol': stock_profile['Symbol'],
                'Company Name': stock_profile['Company Name'],
                'Address': stock_profile['Address'],
                'Description': stock_profile['Description'],
                'Executive Name': executive['Name'],
                'Executive Title': executive['Title']
            })
    except NoSuchElementException as e:
        print(f"Failed to scrape data for {stock}: {e}")

df = pd.DataFrame(all_data)

csv_file_path = '/Users/jonathansedaka/Documents/DATA 580/multiple_stocks_profile_data.csv'
excel_file_path = '/Users/jonathansedaka/Documents/DATA 580/multiple_stocks_profile_data.xlsx'
json_file_path = '/Users/jonathansedaka/Documents/DATA 580/multiple_stocks_profile_data.json'

df.to_csv(csv_file_path, index=False)
df.to_excel(excel_file_path, index=False)
df.to_json(json_file_path, orient='records', lines=True)

print("YES")


Attempt 1 for GS failed. Retrying...
Failed to scrape data for GS: Message: no such element: Unable to locate element: {"method":"css selector","selector":"#Col1-0-Profile-Proxy > section > section.Bxz\(bb\).quote-subsection.undefined > table > tbody"}
  (Session info: chrome=121.0.6167.184); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x000000010529a538 chromedriver + 4687160
1   chromedriver                        0x0000000105291d83 chromedriver + 4652419
2   chromedriver                        0x0000000104e82fbd chromedriver + 397245
3   chromedriver                        0x0000000104ecec3c chromedriver + 707644
4   chromedriver                        0x0000000104ecee11 chromedriver + 708113
5   chromedriver                        0x0000000104f13274 chromedriver + 987764
6   chromedriver                        0x0000000104ef192d ch