In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

# URL of the webpage to scrape
url = 'https://www.uefa.com/european-qualifiers/statistics/teams/'

# Headers to mimic a real browser request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
    'DNT': '1',  # Do Not Track Request Header
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
}

# Set up retry strategy
retry_strategy = Retry(
    total=5,  # Total number of retries
    backoff_factor=1,  # Wait 1 second, then 2 seconds, then 4 seconds, etc.
    status_forcelist=[429, 500, 502, 503, 504],  # Retry on these status codes
    method_whitelist=["HEAD", "GET", "OPTIONS"]  # Retry for these HTTP methods
)

adapter = HTTPAdapter(max_retries=retry_strategy)

# Create a session
session = requests.Session()
session.mount("https://", adapter)
session.mount("http://", adapter)

try:
    # Send a GET request to the webpage
    response = session.get(url, headers=headers, timeout=10)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the webpage
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Find the table containing the team statistics
        table = soup.find('table', {'class': 'statistics-table'})

        # Extract table headers
        headers = [header.text.strip() for header in table.find_all('th')]

        # Extract table rows
        rows = []
        for row in table.find('tbody').find_all('tr'):
            columns = [col.text.strip() for col in row.find_all('td')]
            rows.append(columns)

        # Create a DataFrame from the extracted data
        df = pd.DataFrame(rows, columns=headers)

        # Save the DataFrame to a CSV file
        df.to_csv('uefa_team_statistics.csv', index=False)
        print("Data has been successfully saved to uefa_team_statistics.csv")
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")

except requests.exceptions.RequestException as e:
    print(f"Error during requests to {url}: {str(e)}")


  retry_strategy = Retry(


Error during requests to https://www.uefa.com/european-qualifiers/statistics/teams/: HTTPSConnectionPool(host='www.uefa.com', port=443): Max retries exceeded with url: /european-qualifiers/statistics/teams/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.uefa.com', port=443): Read timed out. (read timeout=10)"))
