In [1]:
# Packages for web scraping
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

# Packages for the GUI application
import tkinter as tk
from tkinter import ttk, messagebox
import os
from pathlib import Path

In [2]:
def setup_driver():
    """Set up Chrome WebDriver with appropriate options."""
    chrome_options = Options()
    chrome_options.add_argument('--headless=new')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument('--window-size=1920x1080')
    
    # Add user agent
    chrome_options.add_argument('--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')
    
    # Additional settings to avoid detection
    chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    
    # Use ChromeDriverManager to handle driver installation
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    
    return driver

def get_season_url(year):
    """Generate URL for a specific Premier League season."""
    if year == 2024:  # Current season
        return "https://fbref.com/en/comps/9/stats/Premier-League-Stats"
    else:
        return f"https://fbref.com/en/comps/9/{year}-{year+1}/stats/{year}-{year+1}-Premier-League-Stats"

def scrape_season(year):
    """Scrape data for a specific Premier League season."""
    url = get_season_url(year)
    
    try:
        # Set up the driver
        driver = setup_driver()
        
        # Load the page
        print("Loading page...")
        driver.get(url)
        
        # Wait for the table to load
        print("Waiting for table to load...")
        wait = WebDriverWait(driver, 20)
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'table#stats_standard')))
        
        # Get the page source
        print("Parsing data...")
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        
        # Find the stats table
        stats_table = soup.find('table', {'id': 'stats_standard'})
        if not stats_table:
            print("No stats table found!")
            return None
            
        # Extract player data
        all_players = []
        for row in stats_table.find('tbody').find_all('tr'):
            try:
                # Get player data from the row
                player_name = row.find('td', {'data-stat': 'player'}).text.strip()
                squad_name = row.find('td', {'data-stat': 'team'}).text.strip()
                age = row.find('td', {'data-stat': 'age'}).text.strip()
                nation = row.find('td', {'data-stat': 'nationality'}).text.strip() if row.find('td', {'data-stat': 'nationality'}) else ''
                
                all_players.append({
                    'Player': player_name,
                    'Squad': squad_name,
                    'Age': age,
                    'Nationality': nation
                })
                
            except (AttributeError, IndexError) as e:
                print(f"Error parsing row: {str(e)}")
                continue
        
        # Create DataFrame
        df = pd.DataFrame(all_players)
        df['Season'] = f'{year}-{year+1}'
        
        print(f"Successfully scraped {len(df)} players")
        return df
        
    except Exception as e:
        print(f"Error: {str(e)}")
        return None
        
    finally:
        driver.quit()

In [5]:
# Run the scraper for multiple seasons
all_seasons = []

# Start with recent seasons first
for year in range(2024, 1991, -1):  # From 2023 back to 1992
    print(f"\nScraping season {year}-{year+1}...")
    df = scrape_season(year)
    if df is not None:
        all_seasons.append(df)
        print(f"Success! Found {len(df)} players")
    else:
        print(f"Failed to scrape season {year}-{year+1}")
    
    # Add a delay between seasons
    time.sleep(random.uniform(3, 5))

# Combine all seasons
if all_seasons:
    final_df = pd.concat(all_seasons, ignore_index=True)
    print(f"\nTotal players found across all seasons: {len(final_df)}")
    
    # Save to CSV
    final_df.to_csv('premier_league_players_all_seasons.csv', index=False)
    print("\nData saved to 'premier_league_players_all_seasons.csv'")
    
    # Display sample
    print("\nSample of data (3 rows per season):")
    print(final_df.groupby('Season').head(3))
else:
    print("No data was successfully scraped")



Scraping season 2024-2025...
Loading page...
Waiting for table to load...
Parsing data...
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute 'text'
Error parsing row: 'NoneType' object has no attribute '

In [6]:

class PLPlayerQuiz:
    def __init__(self, root):
        self.root = root
        self.root.title("Premier League Player Connection Quiz")
        self.root.geometry("800x600")
        
        # Load data
        self.load_data()
        
        # Create GUI elements
        self.create_widgets()
        
    def load_data(self):
        data_dir = Path('/Users/julianball/Downloads/FBREF')
        dfs = []
        
        for year in range(1992, 2025):
            file_path = data_dir / f'PL{year}.csv'
            if file_path.exists():
                df = pd.read_csv(file_path)
                df['year'] = year
                dfs.append(df)
        
        self.df_combined = pd.concat(dfs, ignore_index=True)
        self.df_combined5 = self.df_combined.drop(columns=["Rk", "Age"])
        
        # Create a set of all players for quick lookup
        self.all_players = set(self.df_combined5['Player'].unique())
    
    def create_widgets(self):
        # Main frame
        main_frame = ttk.Frame(self.root, padding="10")
        main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
        
        # Title
        title = ttk.Label(main_frame, text="Premier League Player Connection Quiz", 
                         font=('Helvetica', 16, 'bold'))
        title.grid(row=0, column=0, columnspan=2, pady=10)
        
        # Description
        desc = ttk.Label(main_frame, text="Find players who have been teammates with both selected players!",
                        wraplength=600)
        desc.grid(row=1, column=0, columnspan=2, pady=5)
        
        # Player 1 input
        ttk.Label(main_frame, text="First Player:").grid(row=2, column=0, pady=5)
        self.player1_var = tk.StringVar()
        self.player1_entry = ttk.Entry(main_frame, textvariable=self.player1_var)
        self.player1_entry.grid(row=2, column=1, pady=5, sticky=tk.W)
        self.player1_entry.bind('<KeyRelease>', lambda e: self.suggest_players(self.player1_var.get(), self.player1_listbox))
        
        # Player 1 suggestions
        self.player1_listbox = tk.Listbox(main_frame, height=5)
        self.player1_listbox.grid(row=3, column=1, pady=5, sticky=tk.W)
        self.player1_listbox.bind('<<ListboxSelect>>', 
                                lambda e: self.select_suggestion(self.player1_listbox, self.player1_var))
        
        # Player 2 input
        ttk.Label(main_frame, text="Second Player:").grid(row=4, column=0, pady=5)
        self.player2_var = tk.StringVar()
        self.player2_entry = ttk.Entry(main_frame, textvariable=self.player2_var)
        self.player2_entry.grid(row=4, column=1, pady=5, sticky=tk.W)
        self.player2_entry.bind('<KeyRelease>', lambda e: self.suggest_players(self.player2_var.get(), self.player2_listbox))
        
        # Player 2 suggestions
        self.player2_listbox = tk.Listbox(main_frame, height=5)
        self.player2_listbox.grid(row=5, column=1, pady=5, sticky=tk.W)
        self.player2_listbox.bind('<<ListboxSelect>>', 
                                lambda e: self.select_suggestion(self.player2_listbox, self.player2_var))
        
        # Find button
        ttk.Button(main_frame, text="Find Connections", 
                  command=self.find_connections).grid(row=6, column=0, columnspan=2, pady=20)
        
        # Results area
        self.results_text = tk.Text(main_frame, height=10, width=60, wrap=tk.WORD)
        self.results_text.grid(row=7, column=0, columnspan=2, pady=10)
        
        # Guess entry label
        self.guess_label = ttk.Label(main_frame, text="Enter your guesses (one player per line):")
        self.guess_label.grid(row=8, column=0, columnspan=2, pady=(10,0))
        self.guess_label.grid_remove()
        
        # Guess entry area
        self.guess_text = tk.Text(main_frame, height=5, width=60, wrap=tk.WORD)
        self.guess_text.grid(row=9, column=0, columnspan=2, pady=5)
        self.guess_text.grid_remove()
        
        # Check answers button
        self.check_btn = ttk.Button(main_frame, text="Check Answers", 
                                  command=self.check_answers)
        self.check_btn.grid(row=10, column=0, columnspan=2, pady=5)
        self.check_btn.grid_remove()
        
        # Score display
        self.score_label = ttk.Label(main_frame, text="")
        self.score_label.grid(row=11, column=0, columnspan=2, pady=5)
        self.score_label.grid_remove()
        
        # Show answer button
        self.show_answer_btn = ttk.Button(main_frame, text="Give Up - Show Answers", 
                                        command=self.toggle_answer)
        self.show_answer_btn.grid(row=12, column=0, columnspan=2, pady=10)
        self.show_answer_btn.grid_remove()
        
        # Answer area
        self.answer_text = tk.Text(main_frame, height=5, width=60, wrap=tk.WORD)
        self.answer_text.grid(row=13, column=0, columnspan=2, pady=10)
        self.answer_text.grid_remove()
        
    def suggest_players(self, partial, listbox):
        listbox.delete(0, tk.END)
        if len(partial) < 2:
            return
            
        partial = partial.lower()
        matches = [p for p in self.all_players if partial in p.lower()][:10]
        
        for player in matches:
            listbox.insert(tk.END, player)
    
    def select_suggestion(self, listbox, var):
        if not listbox.curselection():
            return
        var.set(listbox.get(listbox.curselection()))
        listbox.delete(0, tk.END)
    
    def find_squads_and_years_for_player(self, player):
        player_data = self.df_combined5[self.df_combined5['Player'] == player][['Squad', 'year']].drop_duplicates()
        return [tuple(x) for x in player_data.values]
    
    def find_players_in_squads_and_years(self, squad_years):
        filtered_df = self.df_combined5[self.df_combined5[['Squad', 'year']].apply(tuple, axis=1).isin(squad_years)]
        return filtered_df['Player'].unique().tolist()
    
    def find_common_players(self, players1, players2):
        return list(set(players1) & set(players2))
    
    def find_connections(self):
        player1 = self.player1_var.get()
        player2 = self.player2_var.get()
        
        if not player1 or not player2:
            messagebox.showerror("Error", "Please enter both player names")
            return
        
        if player1 not in self.all_players or player2 not in self.all_players:
            messagebox.showerror("Error", "One or both players not found in database")
            return
        
        # Get squad histories
        player1_squads = self.find_squads_and_years_for_player(player1)
        player2_squads = self.find_squads_and_years_for_player(player2)
        
        # Find common teammates
        players1 = self.find_players_in_squads_and_years(player1_squads)
        players2 = self.find_players_in_squads_and_years(player2_squads)
        self.common_players = self.find_common_players(players1, players2)
        
        # Display results
        self.results_text.delete(1.0, tk.END)
        self.results_text.insert(tk.END, f"Number of common teammates: {len(self.common_players)}\n\n")
        
        self.results_text.insert(tk.END, f"{player1}'s teams:\n")
        for squad, year in player1_squads:
            self.results_text.insert(tk.END, f"• {squad} ({year})\n")
        
        self.results_text.insert(tk.END, f"\n{player2}'s teams:\n")
        for squad, year in player2_squads:
            self.results_text.insert(tk.END, f"• {squad} ({year})\n")
        
        # Show guessing interface
        self.guess_label.grid()
        self.guess_text.grid()
        self.check_btn.grid()
        self.score_label.grid()
        self.show_answer_btn.grid()
        
        # Clear previous guesses and answers
        self.guess_text.delete(1.0, tk.END)
        self.answer_text.delete(1.0, tk.END)
        self.answer_text.grid_remove()
        self.score_label.configure(text="")
        self.show_answer_btn.configure(text="Give Up - Show Answers")
    
    def check_answers(self):
        # Get user guesses
        guesses = self.guess_text.get(1.0, tk.END).strip().split('\n')
        guesses = [g.strip() for g in guesses if g.strip()]  # Remove empty lines
        
        # Convert both lists to sets for comparison
        correct_answers = set(self.common_players)
        user_guesses = set(guesses)
        
        # Calculate correct and incorrect guesses
        correct_guesses = user_guesses & correct_answers
        incorrect_guesses = user_guesses - correct_answers
        remaining = correct_answers - user_guesses
        
        # Update score label
        score_text = f"Correct: {len(correct_guesses)} | "
        score_text += f"Incorrect: {len(incorrect_guesses)} | "
        score_text += f"Remaining: {len(remaining)}"
        self.score_label.configure(text=score_text)
        
        # Highlight correct and incorrect guesses
        self.guess_text.tag_remove('correct', '1.0', tk.END)
        self.guess_text.tag_remove('incorrect', '1.0', tk.END)
        
        # Configure tags
        self.guess_text.tag_configure('correct', foreground='green')
        self.guess_text.tag_configure('incorrect', foreground='red')
        
        # Apply highlighting
        text_content = self.guess_text.get(1.0, tk.END)
        for guess in guesses:
            start = '1.0'
            while True:
                start = self.guess_text.search(guess, start, tk.END)
                if not start:
                    break
                end = f"{start}+{len(guess)}c"
                tag = 'correct' if guess in correct_guesses else 'incorrect'
                self.guess_text.tag_add(tag, start, end)
                start = end
    
    def toggle_answer(self):
        if self.answer_text.grid_info():
            self.answer_text.grid_remove()
            self.show_answer_btn.configure(text="Give Up - Show Answers")
        else:
            self.answer_text.delete(1.0, tk.END)
            self.answer_text.insert(tk.END, "\n".join(sorted(self.common_players)))
            self.answer_text.grid()
            self.show_answer_btn.configure(text="Hide Answers")

if __name__ == "__main__":
    root = tk.Tk()
    app = PLPlayerQuiz(root)
    root.mainloop()