In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

class FBrefScraper:
    def __init__(self):
        self.base_url = "https://fbref.com"
        # Add headers to mimic a browser request
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

    def get_current_table(self):
        """Scrape the current Premier League table."""
        # EPL standings URL
        url = f"{self.base_url}/en/comps/9/Premier-League-Stats"
        
        try:
            # Add delay to be respectful to the server
            time.sleep(3)
            
            # Make the request
            response = requests.get(url, headers=self.headers)
            response.raise_for_status()
            
            # Parse the HTML
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Find the standings table
            table = soup.find('table', {'id': 'results2024-202591_overall'})
            
            if table is None:
                raise ValueError("Couldn't find the standings table")
            
            # Extract table data
            rows = []
            for row in table.find_all('tr'):
                cols = row.find_all(['th', 'td'])
                if cols:
                    row_data = [col.text.strip() for col in cols]
                    rows.append(row_data)
            
            # Create DataFrame
            df = pd.DataFrame(rows[1:], columns=rows[0])  # First row as header
            
            # Clean up the data
            df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
            
            return df
        
        except requests.exceptions.RequestException as e:
            print(f"Error making request: {e}")
            return None
        except Exception as e:
            print(f"Error processing data: {e}")
            return None

    def save_table(self, df, filename):
        """Save the table to a CSV file."""
        if df is not None:
            df.to_csv(f"data/raw/{filename}.csv", index=False)
            print(f"Table saved to data/raw/{filename}.csv")

In [9]:
scrapper = FBrefScraper()

In [11]:
df = scrapper.get_current_table()
df.head()

Unnamed: 0,Rk,Squad,MP,W,D,L,GF,GA,GD,Pts,Pts/MP,xG,xGA,xGD,xGD/90,Last 5,Attendance,Top Team Scorer,Goalkeeper,Notes
0,1,Liverpool,8,7,0,1,15,3,12,21,2.63,15.7,6.1,9.6,1.19,L W W W W,60246,"Mohamed Salah, Luis Díaz - 5",Alisson,
1,2,Manchester City,8,6,2,0,19,9,10,20,2.5,15.4,8.5,6.9,0.86,W D D W W,52790,Erling Haaland - 10,Ederson,
2,3,Arsenal,8,5,2,1,15,8,7,17,2.13,13.5,9.1,4.4,0.56,W D W W L,60304,Kai Havertz - 4,David Raya,
3,4,Aston Villa,8,5,2,1,15,10,5,17,2.13,11.4,8.5,2.9,0.36,W W D D W,41455,Ollie Watkins - 5,Emiliano Martínez,
4,5,Brighton,8,4,3,1,14,10,4,15,1.88,11.8,13.3,-1.4,-0.18,D D L W W,34010,Danny Welbeck - 5,Bart Verbruggen,
