In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException

import time
import json

options = Options()
options.headless = False 
driver = webdriver.Chrome(options=options)

urls = ['https://www.coindesk.com/price']

main_data = []

try:
    for url in urls:
        driver.get(url)
        time.sleep(5)

        try:
            main_container = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'flex-grow'))
            )
            main_container1 = main_container.find_elements(By.ID, 'table-wrapper')
        except TimeoutException:
            print(f"Timeout while loading main container for URL: {url}")
            continue

        # Extract links
        links = []
        for container in main_container1:
            link_elements = container.find_elements(By.TAG_NAME, 'a')
            for link_element in link_elements:
                link = link_element.get_attribute('href')
                if link:
                    links.append(link)

        print(f"Extracted {len(links)} links from {url}")

        
        for link in links[:1]:
            try:
                driver.get(link)
                time.sleep(5)

                try:
                    main_content = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.CLASS_NAME, 'flex-grow'))
                    )
                except TimeoutException:
                    print(f"Timeout while loading main content for link: {link}")
                    continue

                # coin name
                try:
                    coin_name = main_content.find_element(By.TAG_NAME, 'h1').text
                except NoSuchElementException:
                    coin_name = "N/A"

                # coin price
                try:
                    coin_price = main_content.find_element(
                        By.XPATH, '/html/body/div[1]/main/div[2]/section/div/div/div/div[1]/div[1]/div[2]').text
                except NoSuchElementException:
                    coin_price = "N/A"

                # market data
                market_data = {}
                try:
                    market_metrics = main_content.find_element(
                        By.XPATH, '/html/body/div[1]/main/div[2]/section/div/div/div/div[3]').text
                    metrics_lines = market_metrics.split('\n')
                    for i in range(0, len(metrics_lines) - 1, 2):
                        key = metrics_lines[i].strip()
                        value = metrics_lines[i + 1].strip()
                        market_data[key] = value
                except NoSuchElementException:
                    market_data = {}

                # Extract Flash Headlines
                flash_links = []
                try:
                    flash_headlines = main_content.find_element(
                        By.XPATH, '/html/body/div[1]/main/div[2]/section/div/div/div/div[6]/div[1]/div[3]')
                    link_elements = flash_headlines.find_elements(By.TAG_NAME, 'a')
                    flash_links = [link.get_attribute('href') for link in link_elements if link.get_attribute('href')]
                except NoSuchElementException:
                    flash_links = []

                # coin price info
                try:
                    coin_price_info = main_content.find_element(
                        By.XPATH, '/html/body/div[1]/main/div[2]/section/div/div/div/div[5]/div/div[2]/div[1]/section').text
                except NoSuchElementException:
                    coin_price_info = "N/A"

                qa_data = {} 
                try:
                
                    WebDriverWait(driver, 10).until(
                        EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'h2'))
                    )
                    question_elements = main_content.find_elements(By.CSS_SELECTOR, 'h2')
                    answer_elements = main_content.find_elements(By.CSS_SELECTOR, 'p')

                    print("Extracting Q/A data...")

                    # Use the longer length to capture all available questions and answers
                    max_length = max(len(question_elements), len(answer_elements))
                    for i in range(max_length):
                        question_text = question_elements[i].text.strip() if i < len(question_elements) else "No question"
                        answer_text = answer_elements[i].text.strip() if i < len(answer_elements) else "No answer"
                        qa_data[question_text] = answer_text

                   

                except TimeoutException:
                    print("Timeout waiting for Q/A elements.")
                except Exception as e:
                    print(f"Error extracting Q/A data: {e}")


                data = {
                    'link': link,
                    'coin_name': coin_name,
                    'coin_price': coin_price,
                    'market_data': market_data,
                    'flash_links': flash_links,
                    'coin_price_info': coin_price_info,
                    'Q/A': qa_data
                }
                main_data.append(data)

            except Exception as e:
                print(f"An error occurred while processing the link: {link}\nError: {e}")

finally:
    driver.quit()




In [None]:
import json
with open("coin_scrape3.json", 'w') as f:
    json.dump(main_data, f)


#### For Q/A if occur on the Page we want to fetch all

In [None]:
qa_data = {}
try:
    WebDriverWait(driver, 10).until(
                        EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'h2'))
                    )
    question_elements = main_content.find_elements(By.CSS_SELECTOR,'h2')
    answer_elements = main_content.find_elements(By.CSS_SELECTOR,'p')

    max_length = max(len(question_elements),len(answer_elements))
    for i in range(max_length):
        question_text = question_elements[i].text.strip() if i < len(question_elements) else 'No Question'
        answer_text = answer_elements[i].text.strip() if i < len(answer_elements) else 'No Answer' 
        qa_data[question_text] = answer_text


except Exception as e:
    print('Error extracting as {e}')

