# Official Davis Cup website

In [69]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import pandas as pd
import re

# Initialize Selenium
chrome_service = ChromeService("C:/Users/ALESSANDRO/Downloads/chromedriver.exe")
chrome_service.start()
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(service=chrome_service, options=chrome_options)

# URL of the webpage
url = "https://www.daviscup.com/en/draws-results/tie.aspx?id=M-DC-2018-WG-M-FRA-NED-01"

# Navigate to the webpage
driver.get(url)

try:
    # Find the div element with class "main"
    main_element = driver.find_element(By.CLASS_NAME, "main")

    # Find the div element with class "rubber-header"
    rubber_header_element = main_element.find_element(By.CLASS_NAME, "rubber-header")

    # Extract "match" and "match status" from the span elements
    spans = rubber_header_element.find_elements(By.TAG_NAME, "span")
#     span_element = rubber_header_element.find_element(By.CSS_SELECTOR, "span.label.ng-binding")


    match_status = ""
    match = ""

    
    if len(spans) >= 2:
        match_status = spans[1].text.strip()
#         match = span_element.text.strip()

    # Find the div element with class "component-title ng-binding" within the main element
    component_title_element = main_element.find_element(By.CLASS_NAME, "component-title")

    # Include the component_title_text in the stage variable
    stage = component_title_element.text.strip()

    # Now, let's find the div element with class "tie" within the main element
    tie_element = main_element.find_element(By.CLASS_NAME, "tie")

    # Find all sub div elements within the "tie" element
    sub_div_elements = tie_element.find_elements(By.TAG_NAME, "div")

    # Initialize variables to store data
    column_data = {}

    for sub_div_element in sub_div_elements:
        sub_div_text = sub_div_element.text.strip()
        if ":" in sub_div_text:
            column_name, column_value = sub_div_text.split(":", 1)
            column_data[column_name] = [column_value]

    # Create a DataFrame from the collected data
    df = pd.DataFrame(column_data)

    # Add the "Stage" column with the component_title_text
    df["Stage"] = stage

    # Now, let's find the div element with class "rubber-body" within the main element
    rubber_body_element = main_element.find_element(By.CLASS_NAME, "rubber-body")

    # Find all tables with class "dc" within the rubber-body
    table_elements = rubber_body_element.find_elements(By.CLASS_NAME, "dc")

    # Initialize lists to store the information
    tables_data = []

    for table_element in table_elements:
        # Initialize data for each table
        table_data = {
            "Player": [],
            "Set 1": [],
            "Set 2": [],
            "Set 3": [],
            "Tie-Break 1": [],
            "Tie-Break 2": [],
            "Tie-Break 3": []
        }

        # Find the table body
        tbody_element = table_element.find_element(By.TAG_NAME, "tbody")

        # Find all rows (tr elements) within the tbody
        rows = tbody_element.find_elements(By.TAG_NAME, "tr")

        for row in rows:
            # Find all td elements within the row
            td_elements = row.find_elements(By.TAG_NAME, "td")

            # Extract and store the information starting from td_elements[1]
            player = td_elements[1].text.strip()

            # Extract results from td class "results"
            results = td_elements[2]
            set_scores = results.find_elements(By.TAG_NAME, "span")

            set_results = []
            tie_breaks = []

            for set_score in set_scores:
                set_result = set_score.text.strip()
                tie_break = ""

                # Use regular expressions to extract the first number in set_result
                match = re.search(r'\d+', set_result)
                if match:
                    set_result = match.group()
                else:
                    set_result = ""

                if set_score.find_elements(By.TAG_NAME, "sup"):
                    tie_break = set_score.find_element(By.TAG_NAME, "sup").text.strip()
                    # Use regular expressions to extract the first number in tie_break
                    match = re.search(r'\d+', tie_break)
                    if match:
                        tie_break = match.group()
                    else:
                        tie_break = ""

                set_results.append(set_result)
                tie_breaks.append(tie_break)

            # Ensure there are at most 3 sets
            set_results = set_results[:3]
            tie_breaks = tie_breaks[:3]

            # Assign the extracted values to the dictionary
            table_data["Player"].append(player)
            table_data["Set 1"].append(set_results[0])
            table_data["Set 2"].append(set_results[1])
            table_data["Set 3"].append(set_results[2])
            
            # Keep only the first element in the list for tie-breaks
            for i, tie_break in enumerate(tie_breaks):
                if i == 0 and tie_break:
                    table_data["Tie-Break 1"].append(tie_break)
                else:
                    table_data[f"Tie-Break {i+1}"].append(None)

        # Append the table data to the list
        tables_data.append(table_data)

    # Create a DataFrame from the collected data
    tables_df = pd.DataFrame(tables_data)

    # Combine the information from both DataFrames
    combined_df = pd.concat([df] * len(tables_df), ignore_index=True)
    combined_df = pd.concat([combined_df, tables_df], axis=1)

    # Add match and match status columns
    combined_df["match status"] = match_status
    combined_df["match"] = "match 1" 

    # Display the combined DataFrame
    print("Combined DataFrame:")
    print(combined_df)

except Exception as e:
    print("Error:", str(e))
finally:
    # Close the Selenium WebDriver
    driver.quit()


Combined DataFrame:
                    Date                                  Venue  \
0   02 Feb - 04 Feb 2018   Halle Olympique, Albertville, France   
1   02 Feb - 04 Feb 2018   Halle Olympique, Albertville, France   

                               Surface               Ball  \
0   Hard - Rebound Ace Synpave, Indoor   Tecnifibre X-One   
1   Hard - Rebound Ace Synpave, Indoor   Tecnifibre X-One   

                    Stage              Player Set 1 Set 2 Set 3 Tie-Break 1  \
0  WORLD GROUP  1ST ROUND  [Adrian MANNARINO]   [6]   [3]   [3]         [4]   
1  WORLD GROUP  1ST ROUND  [Thiemo DE BAKKER]   [7]   [6]   [6]         [7]   

  Tie-Break 2 Tie-Break 3        match status    match  
0      [None]      [None]  PLAYED & COMPLETED  match 1  
1      [None]      [None]  PLAYED & COMPLETED  match 1  
