In [2]:
import pandas as pd
from typing import List, Dict, Any, Union
#from statsbombpy import sb
import numpy as np
import json
import re
#N(reaS^!.sTijg7

In [3]:
def flatten_all_elements(data):
    result = []
    
    # Process all elements in the list
    for element in data:
        # Check if element is a nested list (list of lists)
        if isinstance(element, list) and len(element) > 0 and isinstance(element[0], list):
            # If it's nested, extend with individual sublists
            result.extend(element)
        else:
            # If it's already a simple list, append it directly
            result.append(element)
    
    return result

In [4]:
def extract_matches_to_dataframe(match_data_list: List[List]) -> pd.DataFrame:
    """
    Extract match information from nested lists and convert to a pandas DataFrame.
    
    Expected structure for each match:
    [competition_info, datetime, season, teams_dict, home_events_list, away_events_list]
    
    Args:
        match_data_list: List of lists, where each inner list represents a match
    
    Returns:
        pandas.DataFrame with all match and event information
    """
    all_rows = []
    
    for match_idx, match in enumerate(match_data_list):
        try:
            # Extract basic match information
            competition = match[0] if len(match) > 0 else None
            datetime_str = match[1] if len(match) > 1 else None
            season = match[2] if len(match) > 2 else None
            teams = match[3] if len(match) > 3 and isinstance(match[3], dict) else {}
            
            # Extract team information
            home_team = teams.get('home', None)
            away_team = teams.get('away', None)
            
            # Process events (home and away)
            events_lists = match[4:] if len(match) > 4 else []
            
            # If no events, create one row with match info only
            if not any(events_lists):
                row = {
                    'match_id': match_idx,
                    'competition': competition,
                    'datetime': datetime_str,
                    'season': season,
                    'home_team': home_team,
                    'away_team': away_team,
                    'event_type': None,
                    'time': None,
                    'player': None,
                    'team_side': None,
                    'score': None,
                    'assist': None,
                    'player_in': None,
                    'player_out': None,
                    'reason': None,
                    'event_source': None
                }
                all_rows.append(row)
            else:
                # Process each events list (typically home and away)
                for list_idx, events_list in enumerate(events_lists):
                    if not isinstance(events_list, list):
                        continue
                        
                    team_side = 'home' if list_idx == 0 else 'away'
                    
                    # If events list is empty, create one row for this team
                    if not events_list:
                        row = {
                            'match_id': match_idx,
                            'competition': competition,
                            'datetime': datetime_str,
                            'season': season,
                            'home_team': home_team,
                            'away_team': away_team,
                            'event_type': None,
                            'time': None,
                            'player': None,
                            'team_side': team_side,
                            'score': None,
                            'assist': None,
                            'player_in': None,
                            'player_out': None,
                            'reason': None,
                            'event_source': f'list_{list_idx}'
                        }
                        all_rows.append(row)
                        continue
                    
                    # Process each event in the list
                    for event in events_list:
                        if not isinstance(event, dict):
                            continue
                            
                        # Create base row with match information
                        row = {
                            'match_id': match_idx,
                            'competition': competition,
                            'datetime': datetime_str,
                            'season': season,
                            'home_team': home_team,
                            'away_team': away_team,
                            'team_side': team_side,
                            'event_source': f'list_{list_idx}'
                        }
                        
                        # Extract event information with flexible key handling
                        row['event_type'] = event.get('type', None)
                        row['time'] = event.get('time', None)
                        row['player'] = event.get('player', None)
                        row['score'] = event.get('score', None)
                        row['assist'] = event.get('assist', None)
                        row['player_in'] = event.get('player_in', None)
                        row['player_out'] = event.get('player_out', None)
                        row['reason'] = event.get('reason', None)
                        
                        # Override team_side if specified in event
                        if 'team' in event:
                            row['team_side'] = event['team']
                        
                        # Add any additional keys that might exist
                        for key, value in event.items():
                            if key not in ['type', 'time', 'player', 'score', 'assist', 
                                         'player_in', 'player_out', 'reason', 'team']:
                                row[f'extra_{key}'] = value
                        
                        all_rows.append(row)
                        
        except Exception as e:
            print(f"Error processing match {match_idx}: {e}")
            continue
    
    # Convert to DataFrame
    df = pd.DataFrame(all_rows)
    
    # Reorder columns for better readability
    base_columns = ['match_id', 'competition', 'datetime', 'season', 'home_team', 'away_team', 
                   'team_side', 'event_source', 'event_type', 'time', 'player', 'score', 
                   'assist', 'player_in', 'player_out', 'reason']
    
    # Add any extra columns that were found
    extra_columns = [col for col in df.columns if col.startswith('extra_')]
    final_columns = base_columns + extra_columns
    
    # Only include columns that exist in the DataFrame
    final_columns = [col for col in final_columns if col in df.columns]
    
    return df[final_columns]


def analyze_match_data(df: pd.DataFrame) -> Dict[str, Any]:
    """
    Provide a summary analysis of the extracted match data.
    
    Args:
        df: DataFrame returned by extract_matches_to_dataframe
    
    Returns:
        Dictionary with analysis summary
    """
    analysis = {
        'total_matches': df['match_id'].nunique(),
        'total_events': len(df[df['event_type'].notna()]),
        'competitions': df['competition'].unique().tolist(),
        'seasons': df['season'].unique().tolist(),
        'event_types': df['event_type'].value_counts().to_dict(),
        'teams': sorted(set(df['home_team'].dropna().tolist() + df['away_team'].dropna().tolist())),
        'date_range': {
            'earliest': df['datetime'].min(),
            'latest': df['datetime'].max()
        }
    }
    
    return analysis


In [5]:
def process_match_dataframe(df):
    """
    Process an existing DataFrame to:
    1. Split 'competition' column into 'competition' and 'matchweek'
    2. Add 'half_event' column based on 'time' column
    
    Args:
        df: pandas DataFrame with 'competition' and 'time' columns
    
    Returns:
        pandas DataFrame with processed columns
    """
    # Make a copy to avoid modifying the original
    df_processed = df.copy()
    
    # Split competition column
    df_processed = split_competition_column(df_processed)
    
    # Add half_event column
    df_processed = add_half_event_column(df_processed)
    
    return df_processed

def split_competition_column(df):
    """
    Split the 'competition' column into 'competition' and 'matchweek' columns.
    """
    df = df.copy()
    
    # Initialize new columns
    df['matchweek'] = None
    
    # Function to split individual competition strings
    def split_single_competition(comp_str):
        if pd.isna(comp_str) or not isinstance(comp_str, str):
            return comp_str, None
        
        # Common patterns for matchweek information
        patterns = [
            r'^(.+?)\s*-\s*(JORNADA\s+\d+)$',   # "PREMIER LEAGUE - JORNADA 38"
            r'^(.+?)\s*-\s*(MATCHDAY\s+\d+)$',  # "PREMIER LEAGUE - MATCHDAY 38"
            r'^(.+?)\s*-\s*(GAMEWEEK\s+\d+)$',  # "PREMIER LEAGUE - GAMEWEEK 38"
            r'^(.+?)\s*-\s*(WEEK\s+\d+)$',      # "PREMIER LEAGUE - WEEK 38"
            r'^(.+?)\s*-\s*(MD\s*\d+)$',        # "PREMIER LEAGUE - MD38"
            r'^(.+?)\s*-\s*(GW\s*\d+)$',        # "PREMIER LEAGUE - GW38"
            r'^(.+?)\s*-\s*(ROUND\s+\d+)$',     # "PREMIER LEAGUE - ROUND 38"
            r'^(.+?)\s*-\s*(\d+)$',             # "PREMIER LEAGUE - 38"
        ]
        
        for pattern in patterns:
            match = re.match(pattern, comp_str.strip(), re.IGNORECASE)
            if match:
                competition_name = match.group(1).strip()
                matchweek = match.group(2).strip()
                return competition_name, matchweek
        
        # If no pattern matches, return the full string as competition name
        return comp_str.strip(), None
    
    # Apply the splitting function
    split_results = df['competition'].apply(split_single_competition)
    
    # Update the columns
    df['competition'] = [result[0] for result in split_results]
    df['matchweek'] = [result[1] for result in split_results]
    
    return df

def add_half_event_column(df):
    """
    Add 'half_event' column based on the 'time' column.
    """
    df = df.copy()
    
    def determine_half(time_str):
        """
        Determine if an event occurred in the first or second half.
        """
        if pd.isna(time_str) or not isinstance(time_str, str):
            return None
        
        # Extract the base minute from formats like "74'", "45+2'", "90+1'"
        match = re.match(r'(\d+)', str(time_str).strip())
        if not match:
            return None
        
        try:
            minute = int(match.group(1))
            
            # Football halves: 1-45 minutes = First Half, 46+ minutes = Second Half
            if 1 <= minute <= 45:
                return 'First Half'
            elif minute >= 46:
                return 'Second Half'
            else:
                return None
        except ValueError:
            return None
    
    # Add the half_event column
    df['half_event'] = df['time'].apply(determine_half)
    
    return df

def reorder_columns(df):
    """
    Reorder columns for better readability, putting new columns in logical positions.
    """
    # Define preferred column order
    preferred_order = [
        'match_id', 'competition', 'matchweek', 'datetime', 'season', 
        'home_team', 'away_team', 'team_side', 'event_source', 
        'event_type', 'time', 'half_event', 'player', 'score', 
        'assist', 'player_in', 'player_out', 'reason'
    ]
    
    # Get existing columns
    existing_cols = df.columns.tolist()
    
    # Start with preferred columns that exist
    final_order = [col for col in preferred_order if col in existing_cols]
    
    # Add any remaining columns that weren't in the preferred list
    remaining_cols = [col for col in existing_cols if col not in final_order]
    final_order.extend(remaining_cols)
    
    return df[final_order]

In [6]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from bs4 import BeautifulSoup
import time
def extract_league_information(league_url,season):
    driver = webdriver.Chrome()
    driver.get(league_url)

    wait = WebDriverWait(driver, 10)
    clicks_realizados = 0

    print("Iniciando carga de partidos...")

    while True:
        try:
            # Esperar un poco para que se estabilice la página
            time.sleep(2)
            
            # Buscar el botón "Mostrar más partidos"
            # mas_partidos = wait.until(
            #     EC.element_to_be_clickable((By.CSS_SELECTOR, "a.event__more"))
            # )

            mas_partidos = wait.until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "a.wclButtonLink"))
            )
            
            # Scroll hasta el botón
            driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", mas_partidos)
            time.sleep(1)
            
            # Contar partidos antes del clic
            partidos_antes = len(driver.find_elements(By.CSS_SELECTOR, "div.event__match"))
            
            # Hacer clic
            driver.execute_script("arguments[0].click();", mas_partidos)
            clicks_realizados += 1
            #print(f"Clic #{clicks_realizados} realizado")
            
            # Esperar a que se carguen más partidos (esperar hasta que aumenten)
            wait_time = 0
            max_wait = 10
            
            while wait_time < max_wait:
                time.sleep(1)
                wait_time += 1
                partidos_despues = len(driver.find_elements(By.CSS_SELECTOR, "div.event__match"))
                
                if partidos_despues > partidos_antes:
                    #print(f"Nuevos partidos cargados: {partidos_despues - partidos_antes}")
                    break
            
            # Si no se cargaron nuevos partidos, probablemente ya no hay más
            if partidos_despues <= partidos_antes:
                #print("No se cargaron nuevos partidos. Finalizando...")
                break
                
        except TimeoutException:
            #print("No se encontró más botón 'Mostrar más partidos'")
            break
        except NoSuchElementException:
            #print("Elemento no encontrado")
            break
        except Exception as e:
            #print(f"Error inesperado: {e}")
            break



    # Obtener el HTML final
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    driver.quit()

    # Extraer todos los datos
    sport_container = soup.find(name='div', class_='sportName soccer')
    if sport_container:
        rounds = sport_container.find_all(name='div', class_='event__round event__round--static')
        
        
        # También puedes contar todos los partidos
        all_matches = sport_container.find_all(name='div', class_='event__match')
    

    def setup_driver(headless=True):
        """
        Setup Chrome driver with options
        """
        from selenium.webdriver.chrome.options import Options
        
        options = Options()
        if headless:
            options.add_argument('--headless')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        options.add_argument('--disable-gpu')
        options.add_argument('--window-size=1920,1080')
        
        driver = webdriver.Chrome(options=options)
        return driver

    def extract_matchweek(driver):
        """
        Extract matchweek number from the last span with specific class and data-testid
        """
        matchweek = None
        
        try:
            # Find all spans with both class and data-testid attributes
            matchweek_elements = driver.find_elements(By.CSS_SELECTOR, 'span.wcl-overline_rOFfd.wcl-scores-overline-03_0pkdl[data-testid="wcl-scores-overline-03"]')
            #print(f"Found {len(matchweek_elements)} matchweek elements")
            
            if matchweek_elements:
                # Get the text from the last element
                matchweek = matchweek_elements[-1].text.strip()
                #print(f"Matchweek found (last element): {matchweek}")
            #else:
                #print("No matchweek elements found")
                
        except NoSuchElementException:
            #print("Matchweek elements not found")
            # Try alternative selector with just the data-testid
            try:
                matchweek_elements = driver.find_elements(By.CSS_SELECTOR, 'span[data-testid="wcl-scores-overline-03"]')
                if matchweek_elements:
                    matchweek = matchweek_elements[-1].text.strip()
                    #print(f"Matchweek found (alternative, last element): {matchweek}")
            except NoSuchElementException:
                a=0
                #print("No matchweek elements found with alternative selector")
        #except Exception as e:
            #print(f"Error extracting matchweek: {e}")
        
        return matchweek

    def extract_game_date(driver):
        """
        Extract game date from duelParticipant__startTime class
        """
        game_date = None
        
        try:
            # Find the div with class duelParticipant__startTime
            date_element = driver.find_element(By.CLASS_NAME, 'duelParticipant__startTime')
            game_date = date_element.text.strip()
            #print(f"Game date found: {game_date}")
            
        except NoSuchElementException:
            #print("Game date element not found with primary selector")
            # Try alternative selectors
            alternative_selectors = [
                'div[class*="startTime"]',
                'div[class*="matchTime"]',
                'div[class*="gameTime"]',
                '.duelParticipant [class*="time"]',
                '[class*="duel"] [class*="time"]'
            ]
            
            for selector in alternative_selectors:
                try:
                    date_elements = driver.find_elements(By.CSS_SELECTOR, selector)
                    if date_elements:
                        game_date = date_elements[0].text.strip()
                        #print(f"Game date found with alternative selector '{selector}': {game_date}")
                        break
                except Exception:
                    continue
                    
            # if not game_date:
            #     print("No game date elements found with any selector")
                
        except Exception as e:
            a=0
        
        return game_date

    def extract_team_names(driver):
        """
        Extract team names from the match page
        """
        team_names = {'home': None, 'away': None}
        
        try:
            # Try different selectors for team names
            selectors = [
                '.duelParticipant__home .participant__participantName',
                '.duelParticipant__away .participant__participantName',
                '.participant__participantName',
                '.teamHeader__name',
                '.participant__participantNameWrapper',
                '[class*="participant"][class*="Name"]'
            ]
            
            # Look for team containers
            home_containers = driver.find_elements(By.CSS_SELECTOR, '[class*="home"], [class*="Home"]')
            away_containers = driver.find_elements(By.CSS_SELECTOR, '[class*="away"], [class*="Away"]')
            
            # Try to find team names in home containers
            for container in home_containers:
                try:
                    name_element = container.find_element(By.CSS_SELECTOR, '[class*="participant"][class*="Name"], [class*="team"][class*="Name"]')
                    if name_element.text.strip():
                        team_names['home'] = name_element.text.strip()
                        break
                except NoSuchElementException:
                    continue
            
            # Try to find team names in away containers
            for container in away_containers:
                try:
                    name_element = container.find_element(By.CSS_SELECTOR, '[class*="participant"][class*="Name"], [class*="team"][class*="Name"]')
                    if name_element.text.strip():
                        team_names['away'] = name_element.text.strip()
                        break
                except NoSuchElementException:
                    continue
            
            # If still not found, try generic selectors
            if not team_names['home'] or not team_names['away']:
                participant_elements = driver.find_elements(By.CSS_SELECTOR, '[class*="participant"][class*="Name"]')
                if len(participant_elements) >= 2:
                    team_names['home'] = participant_elements[0].text.strip()
                    team_names['away'] = participant_elements[1].text.strip()
        
        except Exception as e:
            #print(f"Error extracting team names: {e}")
            a=0
        
        return team_names

    def extract_incident_details(incident_element, driver):
        """
        Extract detailed information from an incident element
        """
        details = {}
        
        try:
            # Extract time
            time_box = incident_element.find_element(By.CLASS_NAME, 'smv__timeBox')
            details['time'] = time_box.text.strip()
        except NoSuchElementException:
            details['time'] = None
        
        try:
            # Check if it's a goal (has smv__incidentAwayScore or smv__incidentHomeScore)
            try:
                away_score = incident_element.find_element(By.CLASS_NAME, 'smv__incidentAwayScore')
                details['type'] = 'goal'
                details['score'] = away_score.text.strip()
                details['team'] = 'away'
            except NoSuchElementException:
                try:
                    home_score = incident_element.find_element(By.CLASS_NAME, 'smv__incidentHomeScore')
                    details['type'] = 'goal'
                    details['score'] = home_score.text.strip()
                    details['team'] = 'home'
                except NoSuchElementException:
                    pass
        except Exception as e:
            pass
        
        try:
            # Extract player name
            player_name = incident_element.find_element(By.CLASS_NAME, 'smv__playerName')
            details['player'] = player_name.text.strip()
        except NoSuchElementException:
            details['player'] = None
        
        try:
            # Extract assist information (for goals)
            assist_elements = incident_element.find_elements(By.CSS_SELECTOR, '.smv__assist, .smv__assistAway')
            if assist_elements:
                assist_element = assist_elements[0]
                try:
                    assist_player_link = assist_element.find_element(By.TAG_NAME, 'a')
                    details['assist'] = assist_player_link.text.strip()
                except NoSuchElementException:
                    details['assist'] = assist_element.text.strip()
        except Exception:
            details['assist'] = None
        
        try:
            # Check if it's a substitution
            sub_icon = incident_element.find_element(By.CLASS_NAME, 'smv__incidentIconSub')
            details['type'] = 'substitution'
            
            # Get player coming in (smv__playerName - first one)
            try:
                player_in = incident_element.find_element(By.CLASS_NAME, 'smv__playerName')
                details['player_in'] = player_in.text.strip()
            except NoSuchElementException:
                details['player_in'] = None
            
            # Get player going out (smv__subDown smv__playerName)
            try:
                player_out = incident_element.find_element(By.CSS_SELECTOR, '.smv__subDown.smv__playerName, a.smv__subDown')
                details['player_out'] = player_out.text.strip()
            except NoSuchElementException:
                details['player_out'] = None
                
        except NoSuchElementException:
            pass
        
        try:
            # Check for cards
            card_icons = incident_element.find_elements(By.CSS_SELECTOR, '.card-ico.yellowCard-ico, .card-ico.redCard-ico')
            if card_icons:
                if 'yellowCard-ico' in card_icons[0].get_attribute('class'):
                    details['type'] = 'yellow_card'
                elif 'redCard-ico' in card_icons[0].get_attribute('class'):
                    details['type'] = 'red_card'
                    
                # Get the reason for the card
                try:
                    sub_incident = incident_element.find_element(By.CLASS_NAME, 'smv__subIncident')
                    details['reason'] = sub_incident.text.strip()
                except NoSuchElementException:
                    details['reason'] = None
        except Exception:
            pass
        
        return details

    def parse_match_events_selenium(url_or_html=None, driver=None):
        """
        Parse match events using Selenium
        """
        close_driver = False
        if driver is None:
            driver = setup_driver()
            close_driver = True
        
        try:
            # If URL is provided, navigate to it. Otherwise assume HTML is already loaded
            if url_or_html and url_or_html.startswith('http'):
                driver.get(url_or_html)
                time.sleep(3)  # Wait for page to load
            elif url_or_html:
                # If HTML string is provided, save it to a temp file and load it
                with open('temp_match.html', 'w', encoding='utf-8') as f:
                    f.write(url_or_html)
                driver.get('file://' + os.path.abspath('temp_match.html'))
                time.sleep(2)
            
            # Extract matchweek first
            matchweek = extract_matchweek(driver)
            
            # Extract game date
            game_date = extract_game_date(driver)
            
            # Extract team names
            team_names = extract_team_names(driver)
        # print(f"Team names found: {team_names}")
            
            # Wait for the match events container to load
            wait = WebDriverWait(driver, 10)
            wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'smv__verticalSections')))
            
            # Find all score header sections (1er Tiempo, 2º Tiempo)
            score_headers = driver.find_elements(By.CLASS_NAME, 'wcl-overline_rOFfd.wcl-scores-overline-02_n9EXm.wcl-cell_LDXJM')
            
            # Extract header texts and scores
            headers_info = []
            for i in range(0, len(score_headers), 2):  # Headers come in pairs (period, score)
                if i + 1 < len(score_headers):
                    period_text = score_headers[i].text.strip()
                    score_text = score_headers[i + 1].text.strip()
                    headers_info.append({'period': period_text, 'score': score_text})
            
            # print("Headers found:")
            # for header in headers_info:
            #     print(f"Period: {header['period']}, Score: {header['score']}")
            
            # Find all participant rows
            all_home_events = driver.find_elements(By.CSS_SELECTOR, '.smv__participantRow.smv__homeParticipant')
            all_away_events = driver.find_elements(By.CSS_SELECTOR, '.smv__participantRow.smv__awayParticipant')
            
            #print(f"Found {len(all_home_events)} home events and {len(all_away_events)} away events")
            
            # Find the second header to split events
            second_header = None
            for header in score_headers:
                if '2º Tiempo' in header.text or '2do Tiempo' in header.text:
                    second_header = header
                    break
            
            # Initialize lists for events
            home_events_firsthalf = []
            away_events_firsthalf = []
            home_events_secondhalf = []
            away_events_secondhalf = []
            
            # Initialize lists for detailed incident data
            home_incidents_firsthalf = []
            away_incidents_firsthalf = []
            home_incidents_secondhalf = []
            away_incidents_secondhalf = []
            
            if second_header:
                #print("Found second header, splitting events by halves")
                
                # Get the location of the second header
                second_header_location = second_header.location['y']
                
                # Separate home events and extract details
                for event in all_home_events:
                    event_location = event.location['y']
                    incident_details = extract_incident_details(event, driver)
                    
                    if event_location < second_header_location:
                        home_events_firsthalf.append(event)
                        home_incidents_firsthalf.append(incident_details)
                    else:
                        home_events_secondhalf.append(event)
                        home_incidents_secondhalf.append(incident_details)
                
                # Separate away events and extract details
                for event in all_away_events:
                    event_location = event.location['y']
                    incident_details = extract_incident_details(event, driver)
                    
                    if event_location < second_header_location:
                        away_events_firsthalf.append(event)
                        away_incidents_firsthalf.append(incident_details)
                    else:
                        away_events_secondhalf.append(event)
                        away_incidents_secondhalf.append(incident_details)
            else:
                #print("No second header found, putting all events in first half")
                home_events_firsthalf = all_home_events
                away_events_firsthalf = all_away_events
                
                # Extract details for all events
                for event in all_home_events:
                    incident_details = extract_incident_details(event, driver)
                    home_incidents_firsthalf.append(incident_details)
                
                for event in all_away_events:
                    incident_details = extract_incident_details(event, driver)
                    away_incidents_firsthalf.append(incident_details)
            
            # Create merged first half events dictionary
            first_half_merged = {
                'home_events': home_incidents_firsthalf,
                'away_events': away_incidents_firsthalf,
                'all_events': home_incidents_firsthalf + away_incidents_firsthalf
            }
            
            # Sort all events by time if time is available
            def parse_time(time_str):
                if not time_str:
                    return 0
                try:
                    # Handle formats like "45+3", "90", etc.
                    if '+' in time_str:
                        base_time, extra_time = time_str.split('+')
                        return int(base_time) + int(extra_time)
                    else:
                        return int(time_str.replace("'", ""))
                except:
                    return 0
            
            first_half_merged['all_events_sorted'] = sorted(
                first_half_merged['all_events'], 
                key=lambda x: parse_time(x.get('time', '0'))
            )
            
            return {
                'matchweek': matchweek,
                'game_date': game_date,  # Added game date to the return dictionary
                'team_names': team_names,
                'headers': headers_info,
                'first_half_merged': first_half_merged,
                'home_events_firsthalf': home_events_firsthalf,
                'away_events_firsthalf': away_events_firsthalf,
                'home_events_secondhalf': home_events_secondhalf,
                'away_events_secondhalf': away_events_secondhalf,
                'home_incidents_firsthalf': home_incidents_firsthalf,
                'away_incidents_firsthalf': away_incidents_firsthalf,
                'home_incidents_secondhalf': home_incidents_secondhalf,
                'away_incidents_secondhalf': away_incidents_secondhalf
            }
            
        except TimeoutException:
            print("Timeout waiting for page elements to load")
            return None
        except Exception as e:
            print(f"Error parsing match events: {e}")
            return None
        finally:
            if close_driver:
                driver.quit()



    
#Main execution code example
    
    list_league = []
    matchweeks_list = []

    for elements in soup.find(name='div', class_='sportName soccer').find_all(name='div',class_='event__round event__round--static'):
        matchweeks_list.append(elements.text)

    for elements in all_matches:
        
            if __name__ == "__main__":
                # Option 1: Parse from URL
                url = elements.find('a').get('href')
                result = parse_match_events_selenium(url)
                
                if result:
                    # Show goals from first half
                    first_half_goals = [event for event in result['first_half_merged']['all_events'] if event.get('type') == 'goal']
                    for goal in first_half_goals:
                        assist_info = f" (Assist: {goal['assist']})" if goal.get('assist') else ""
            
                    # Add matchweek, game_date, team names, and incidents to the list
                    list_league.append([
                        result['matchweek'],  # First element: matchweek
                        result['game_date'],  # Second element: game date
                        season,
                        result['team_names'], # Third element: team names
                        result['home_incidents_firsthalf'], # Fourth element: home incidents
                        result['away_incidents_firsthalf']  # Fifth element: away incidents
                        
                        
                    ])
                    
                    print(result['matchweek'], result['game_date'], result['team_names'],season)

    return list_league

#leagues_information = extract_league_information('https://www.flashscore.co/futbol/inglaterra/premier-league/resultados/','2024/2025')





In [15]:
# premier_league_links = [
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2024-2025/resultados/','2024/2025'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2023-2024/resultados/','2023/2024'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2022-2023/resultados/','2022/2023'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2021-2022/resultados/','2021/2022'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2020-2021/resultados/','2020/2021'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2019-2020/resultados/','2019/2020'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2018-2019/resultados/','2018/2019'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2017-2018/resultados/','2017/2018'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2016-2017/resultados/','2016/2017'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2015-2016/resultados/','2015/2016'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2014-2015/resultados/','2014/2015'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2013-2014/resultados/','2013/2014'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2012-2013/resultados/','2012/2013'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2011-2012/resultados/','2011/2012'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2010-2011/resultados/','2010/2011'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2009-2010/resultados/','2009/2010'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2008-2009/resultados/','2008/2009'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2007-2008/resultados/','2007/2008'],
#     ['https://www.flashscore.co/futbol/inglaterra/premier-league-2006-2007/resultados/','2006/2007']
# ]

# championship_links = [
#     ['https://www.flashscore.co/futbol/inglaterra/championship/resultados/','2024/2025'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2023-2024/resultados/','2023/2024'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2022-2023/resultados/','2022/2023'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2021-2022/resultados/','2021/2022'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2020-2021/resultados/','2020/2021'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2019-2020/resultados/','2019/2020'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2018-2019/resultados/','2018/2019'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2017-2018/resultados/','2017/2018'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2016-2017/resultados/','2016/2017'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2015-2016/resultados/','2015/2016'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2014-2015/resultados/','2014/2015'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2013-2014/resultados/','2013/2014'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2012-2013/resultados/','2012/2013'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2011-2012/resultados/','2011/2012'],
#     ['https://www.flashscore.co/futbol/inglaterra/championship-2010-2011/resultados/','2010/2011']
# ]

# league_2dadivision_england_links = [
#     ['https://www.flashscore.co/futbol/inglaterra/league-one/resultados/','2024/2025'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2023-2024/resultados/','2023/2024'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2022-2023/resultados/','2022/2023'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2021-2022/resultados/','2021/2022'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2020-2021/resultados/','2020/2021'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2019-2020/resultados/','2019/2020'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2018-2019/resultados/','2018/2019'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2017-2018/resultados/','2017/2018'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2016-2017/resultados/','2016/2017'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2015-2016/resultados/','2015/2016'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2014-2015/resultados/','2014/2015'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2013-2014/resultados/','2013/2014'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2012-2013/resultados/','2012/2013'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2011-2012/resultados/','2011/2012'],
#     ['https://www.flashscore.co/futbol/inglaterra/league-one-2010-2011/resultados/','2010/2011']
# ]

# bundesliga_links = [
#     ['https://www.flashscore.co/futbol/alemania/bundesliga/resultados/','2024/2025'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2023-2024/resultados/','2023/2024'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2022-2023/resultados/','2022/2023'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2021-2022/resultados/','2021/2022'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2020-2021/resultados/','2020/2021'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2019-2020/resultados/','2019/2020'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2018-2019/resultados/','2018/2019'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2017-2018/resultados/','2017/2018'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2016-2017/resultados/','2016/2017'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2015-2016/resultados/','2015/2016'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2014-2015/resultados/','2014/2015'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2013-2014/resultados/','2013/2014'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2012-2013/resultados/','2012/2013'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2011-2012/resultados/','2011/2012'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2010-2011/resultados/','2010/2011'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2009-2010/resultados/','2009/2010'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2008-2009/resultados/','2008/2009'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2007-2008/resultados/','2007/2008'],
#     ['https://www.flashscore.co/futbol/alemania/bundesliga-2006-2007/resultados/','2006/2007']
# ]

# bundesliga2_links = [
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga/resultados/','2024/2025'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2023-2024/resultados/','2023/2024'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2022-2023/resultados/','2022/2023'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2021-2022/resultados/','2021/2022'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2020-2021/resultados/','2020/2021'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2019-2020/resultados/','2019/2020'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2018-2019/resultados/','2018/2019'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2017-2018/resultados/','2017/2018'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2016-2017/resultados/','2016/2017'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2015-2016/resultados/','2015/2016'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2014-2015/resultados/','2014/2015'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2013-2014/resultados/','2013/2014'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2012-2013/resultados/','2012/2013'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2011-2012/resultados/','2011/2012'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2010-2011/resultados/','2010/2011'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2009-2010/resultados/','2009/2010'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2008-2009/resultados/','2008/2009'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2007-2008/resultados/','2007/2008'],
#     ['https://www.flashscore.co/futbol/alemania/2-bundesliga-2006-2007/resultados/','2006/2007']
# ]

eredivise_links = [
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2024-2025/resultados/','2024/2025'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2023-2024/resultados/','2023/2024'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2022-2023/resultados/','2022/2023'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2021-2022/resultados/','2021/2022'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2020-2021/resultados/','2020/2021'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2019-2020/resultados/','2019/2020'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2018-2019/resultados/','2018/2019'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2017-2018/resultados/','2017/2018'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2016-2017/resultados/','2016/2017'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2015-2016/resultados/','2015/2016'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2014-2015/resultados/','2014/2015'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2013-2014/resultados/','2013/2014'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2012-2013/resultados/','2012/2013'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2011-2012/resultados/','2011/2012'],
    ['https://www.flashscore.co/futbol/paises-bajos/eredivisie-2010-2011/resultados/','2010/2011']
]

keuken_links = [
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2024-2025/resultados/','2024/2025'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2023-2024/resultados/','2023/2024'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2022-2023/resultados/','2022/2023'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2021-2022/resultados/','2021/2022'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2020-2021/resultados/','2020/2021'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2019-2020/resultados/','2019/2020'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2018-2019/resultados/','2018/2019'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2017-2018/resultados/','2017/2018'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2016-2017/resultados/','2016/2017'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2015-2016/resultados/','2015/2016'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2014-2015/resultados/','2014/2015'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2013-2014/resultados/','2013/2014'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2012-2013/resultados/','2012/2013'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2011-2012/resultados/','2011/2012'],
    ['https://www.flashscore.co/futbol/paises-bajos/keuken-kampioen-divisie-2010-2011/resultados/','2010/2011']
]

laliga_links = [
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2024-2025/resultados/','2024/2025'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2023-2024/resultados/','2023/2024'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2022-2023/resultados/','2022/2023'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2021-2022/resultados/','2021/2022'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2020-2021/resultados/','2020/2021'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2019-2020/resultados/','2019/2020'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2018-2019/resultados/','2018/2019'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2017-2018/resultados/','2017/2018'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2016-2017/resultados/','2016/2017'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2015-2016/resultados/','2015/2016'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2014-2015/resultados/','2014/2015'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2013-2014/resultados/','2013/2014'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2012-2013/resultados/','2012/2013'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2011-2012/resultados/','2011/2012'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2010-2011/resultados/','2010/2011'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2009-2010/resultados/','2009/2010'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2008-2009/resultados/','2008/2009'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2007-2008/resultados/','2007/2008'],
    ['https://www.flashscore.co/futbol/espana/laliga-ea-sports-2006-2007/resultados/','2006/2007']
]

laliga2_links = [
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2024-2025/resultados/','2024/2025'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2023-2024/resultados/','2023/2024'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2022-2023/resultados/','2022/2023'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2021-2022/resultados/','2021/2022'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2020-2021/resultados/','2020/2021'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2019-2020/resultados/','2019/2020'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2018-2019/resultados/','2018/2019'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2017-2018/resultados/','2017/2018'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2016-2017/resultados/','2016/2017'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2015-2016/resultados/','2015/2016'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2014-2015/resultados/','2014/2015'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2013-2014/resultados/','2013/2014'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2012-2013/resultados/','2012/2013'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2011-2012/resultados/','2011/2012'],
    ['https://www.flashscore.co/futbol/espana/laliga-hypermotion-2010-2011/resultados/','2010/2011']
]

league_one_france_links = [
    ['https://www.flashscore.co/futbol/francia/ligue-1-2024-2025/resultados/','2024/2025'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2023-2024/resultados/','2023/2024'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2022-2023/resultados/','2022/2023'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2021-2022/resultados/','2021/2022'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2020-2021/resultados/','2020/2021'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2019-2020/resultados/','2019/2020'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2018-2019/resultados/','2018/2019'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2017-2018/resultados/','2017/2018'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2016-2017/resultados/','2016/2017'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2015-2016/resultados/','2015/2016'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2014-2015/resultados/','2014/2015'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2013-2014/resultados/','2013/2014'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2012-2013/resultados/','2012/2013'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2011-2012/resultados/','2011/2012'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2010-2011/resultados/','2010/2011'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2009-2010/resultados/','2009/2010'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2008-2009/resultados/','2008/2009'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2007-2008/resultados/','2007/2008'],
    ['https://www.flashscore.co/futbol/francia/ligue-1-2006-2007/resultados/','2006/2007']
]

league_two_france_links = [
    ['https://www.flashscore.co/futbol/francia/ligue-2-2024-2025/resultados/','2024/2025'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2023-2024/resultados/','2023/2024'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2022-2023/resultados/','2022/2023'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2021-2022/resultados/','2021/2022'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2020-2021/resultados/','2020/2021'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2019-2020/resultados/','2019/2020'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2018-2019/resultados/','2018/2019'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2017-2018/resultados/','2017/2018'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2016-2017/resultados/','2016/2017'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2015-2016/resultados/','2015/2016'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2014-2015/resultados/','2014/2015'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2013-2014/resultados/','2013/2014'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2012-2013/resultados/','2012/2013'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2011-2012/resultados/','2011/2012'],
    ['https://www.flashscore.co/futbol/francia/ligue-2-2010-2011/resultados/','2010/2011']
]

leagues_information = []

# for elements in league_links:
#     leagues_information.append(extract_league_information(elements[0],elements[1]))

In [16]:
total_links_list = []
# total_links_list.append(premier_league_links)
# total_links_list.append(championship_links)
# total_links_list.append(league_2dadivision_england_links)
# total_links_list.append(bundesliga_links)
# total_links_list.append(bundesliga2_links)
total_links_list.append(eredivise_links)
total_links_list.append(keuken_links)
total_links_list.append(laliga_links)
total_links_list.append(laliga2_links)
total_links_list.append(league_one_france_links)
total_links_list.append(league_two_france_links)


In [18]:
i = 6

for elements in total_links_list:

    leagues_information = []

    for elementos in elements:
        
        leagues_information.append(extract_league_information(elementos[0],elementos[1]))
    
    FINAL_result = flatten_all_elements(leagues_information)

    # Assuming your list is called 'my_list'
    with open(f'data_league{i}.json', 'w') as f:
        json.dump(FINAL_result[6:], f, indent=2)

    with open(f'data_league{i}.json', 'r', encoding='utf-8') as f:
        read_data = json.load(f)

    df = extract_matches_to_dataframe(read_data)

    df2 = process_match_dataframe(df)

    goals_stats_teams = []

    unique_seasons = list(df2['season'].unique())

    for individual_season in unique_seasons:
        unique_teams = list(df2[df2['season'] == individual_season]['home_team'].unique())
        for individual_team in unique_teams:

            temporal_dict = {}

            home_goals_per_season = len(df2[(df2['home_team'] == individual_team) & (df2['event_type'] == 'goal') & (df2['team_side'] == 'home') & (df2['season'] == individual_season)])
            away_goals_per_season = len(df2[(df2['away_team'] == individual_team) & (df2['event_type'] == 'goal') & (df2['team_side'] == 'away') & (df2['season'] == individual_season)])

            average_goals_per_season = (home_goals_per_season + away_goals_per_season)/((len(unique_teams)-1)*2)

            # first half home season goals scored
            first_half_home_season_goals_scored = len(df2[(df2['home_team'] == individual_team) & (df2['event_type'] == 'goal') & (df2['team_side'] == 'home') & (df2['season'] == individual_season) & (df2['half_event'] == 'First Half')])
            # second half home season goals scored
            second_half_home_season_goals_scored = len(df2[(df2['home_team'] == individual_team) & (df2['event_type'] == 'goal') & (df2['team_side'] == 'home') & (df2['season'] == individual_season) & (df2['half_event'] == 'Second Half')])
            # first half away season goals scored
            first_half_away_season_goals_scored = len(df2[(df2['away_team'] == individual_team) & (df2['event_type'] == 'goal') & (df2['team_side'] == 'away') & (df2['season'] == individual_season) & (df2['half_event'] == 'First Half')])
            # first half away season goals scored
            second_half_away_season_goals_scored = len(df2[(df2['away_team'] == individual_team) & (df2['event_type'] == 'goal') & (df2['team_side'] == 'away') & (df2['season'] == individual_season) & (df2['half_event'] == 'Second Half')])
            # first half season goals average scored
            average_first_half_goals_scored = (first_half_home_season_goals_scored + first_half_away_season_goals_scored)/((len(unique_teams)-1)*2)
            # second half season goals average scored
            average_second_half_goals_scored = (second_half_away_season_goals_scored + second_half_home_season_goals_scored)/((len(unique_teams)-1)*2)

            average_home_game_first_half_goals_scored = first_half_home_season_goals_scored/(len(unique_teams)-1)

            average_home_game_second_half_goals_scored = second_half_home_season_goals_scored/(len(unique_teams)-1)

            average_away_game_first_half_goals_scored = first_half_away_season_goals_scored/(len(unique_teams)-1)

            average_away_game_second_half_goals_scored = second_half_away_season_goals_scored/(len(unique_teams)-1)


            # print(f'season: {individual_season} , team: {individual_team}: home goals: {home_goals_per_season}, away goals: {away_goals_per_season}, average goals: {average_goals_per_season}')
            # print('==============================')
            # print(f'first half all season goals scored: {first_half_home_season_goals_scored + first_half_away_season_goals_scored} second half all season goals scored: {second_half_away_season_goals_scored + second_half_home_season_goals_scored}')
            # print('==============================')
            # print(f'average goals first half: {average_first_half_goals_scored} average goals second half: {average_second_half_goals_scored}')
            # print('==============================')
            # print(f'home game first half goals scored: {first_half_home_season_goals_scored} home game second half goals scored {second_half_home_season_goals_scored}')
            # print('==============================')
            # print(f'average home game first half goals: {average_home_game_first_half_goals_scored} average home game second half goals: {average_home_game_second_half_goals_scored}')
            # print('==============================')
            # print(f'away game first half goals scored:{first_half_away_season_goals_scored}  away game second half goals scored:{second_half_away_season_goals_scored}')
            # print('==============================')
            # print(f'average away game first half goals: {average_away_game_first_half_goals_scored} average away game second half goals: {average_away_game_second_half_goals_scored}')

            temporal_dict.update({'season': individual_season, 'team': individual_team, 'home_goals_season': home_goals_per_season, 'away_goals_season':away_goals_per_season,
                                'average_goals_season':average_goals_per_season, 'first_half_season_goals_scored': first_half_home_season_goals_scored + first_half_away_season_goals_scored,
                                'second_half_season_goals_scored':second_half_away_season_goals_scored + second_half_home_season_goals_scored, 'average_season_first_half_goals_scored':average_first_half_goals_scored,
                                'average_season_second_half_goals_scored':average_second_half_goals_scored,'first_half_home_season_goals_scored':first_half_home_season_goals_scored,
                                'second_half_home_season_goals_scored':second_half_home_season_goals_scored, 'average_home_game_first_half_goals_scored':average_home_game_first_half_goals_scored,
                                'average_home_game_second_half_goals_scored':average_home_game_second_half_goals_scored,'first_half_away_season_goals_scored':first_half_away_season_goals_scored,
                                'second_half_away_season_goals_scored':second_half_away_season_goals_scored,'average_away_game_first_half_goals_scored':average_away_game_first_half_goals_scored,
                                'average_away_game_second_half_goals_scored':average_away_game_second_half_goals_scored})
            
            goals_stats_teams.append(temporal_dict)
    with open(f'goal_stats_league{0}.json', 'w', encoding='utf-8') as f:
            json.dump(temporal_dict, f, ensure_ascii=False, indent=4)    

    
    i += 1

Iniciando carga de partidos...
EREDIVISIE - DESCENSO - PLAYOFFS - FINAL 01.06.2025 11:00 {'home': 'Willem II', 'away': 'Telstar\nVencedor'} 2024/2025
EREDIVISIE - DESCENSO - PLAYOFFS - FINAL 29.05.2025 13:00 {'home': 'Telstar', 'away': 'Willem II'} 2024/2025
EREDIVISIE - CONFERENCE LEAGUE - PLAY OFFS - FINAL 25.05.2025 11:00 {'home': 'Vencedor\nAZ Alkmaar', 'away': 'Twente'} 2024/2025
EREDIVISIE - DESCENSO - PLAYOFFS - SEMIFINALES 24.05.2025 13:00 {'home': 'Equipo que avanza\nWillem II', 'away': 'Dordrecht'} 2024/2025
EREDIVISIE - DESCENSO - PLAYOFFS - SEMIFINALES 23.05.2025 13:00 {'home': 'Equipo que avanza\nTelstar', 'away': 'Den Bosch'} 2024/2025
EREDIVISIE - CONFERENCE LEAGUE - PLAY OFFS - SEMIFINALES 22.05.2025 14:00 {'home': 'Equipo que avanza\nTwente', 'away': 'Nijmegen'} 2024/2025
EREDIVISIE - CONFERENCE LEAGUE - PLAY OFFS - SEMIFINALES 22.05.2025 11:45 {'home': 'Equipo que avanza\nAZ Alkmaar', 'away': 'Heerenveen'} 2024/2025
EREDIVISIE - DESCENSO - PLAYOFFS - SEMIFINALES 21.05

In [None]:
len(leagues_information)

6

In [216]:
len(FINAL_result[6:])

2280

In [217]:
FINAL_result[6:][-1]

['LALIGA SANTANDER - JORNADA 1',
 '16.08.2019 14:00',
 '2019/2020',
 {'home': 'Athletic Club', 'away': 'Barcelona'},
 [{'time': "65'",
   'player': 'Núñez U.',
   'type': 'yellow_card',
   'reason': '(Entrada temeraria)'},
  {'time': "66'",
   'player': 'Sancet O.',
   'type': 'substitution',
   'player_in': 'Sancet O.',
   'player_out': 'de Marcos O.'},
  {'time': "81'",
   'player': 'Beñat',
   'type': 'substitution',
   'player_in': 'Beñat',
   'player_out': 'López U.'},
  {'time': "88'",
   'player': 'Aduriz A.',
   'type': 'substitution',
   'player_in': 'Aduriz A.',
   'player_out': 'Williams I.'},
  {'time': "89'",
   'type': 'goal',
   'score': '1 - 0',
   'team': 'home',
   'player': 'Aduriz A.',
   'assist': 'Capa A.'}],
 [{'time': "37'",
   'player': 'Rafinha',
   'type': 'substitution',
   'player_in': 'Rafinha',
   'player_out': 'Suárez L.'},
  {'time': "46'",
   'player': 'Rakitic I.',
   'type': 'substitution',
   'player_in': 'Rakitic I.',
   'player_out': 'Aleñá C.'},


In [218]:
import json

# Assuming your list is called 'my_list'
with open('my_data7.json', 'w') as f:
    json.dump(FINAL_result[6:], f, indent=2)

In [3]:
with open('my_data.json', 'r', encoding='utf-8') as f:
        leyendodatos = json.load(f)
leyendodatos

[['PREMIER LEAGUE - JORNADA 38',
  '25.05.2025 10:00',
  '2024/2025',
  {'home': 'Bournemouth', 'away': 'Leicester'},
  [{'time': "63'",
    'player': 'Jebbison D.',
    'type': 'substitution',
    'player_in': 'Jebbison D.',
    'player_out': 'Brooks D.'},
   {'time': "65'", 'player': None, 'assist': 'Jebbison D.'},
   {'time': "74'",
    'type': 'goal',
    'score': '1 - 0',
    'team': 'home',
    'player': 'Semenyo A.',
    'assist': 'Zabarnyi I.'},
   {'time': "78'",
    'player': 'Huijsen D.',
    'type': 'substitution',
    'player_in': 'Huijsen D.',
    'player_out': 'Senesi M.'},
   {'time': "88'",
    'type': 'goal',
    'score': '2 - 0',
    'team': 'home',
    'player': 'Semenyo A.',
    'assist': 'Huijsen D.'},
   {'time': "90+1'",
    'player': 'Scott A.',
    'type': 'substitution',
    'player_in': 'Scott A.',
    'player_out': 'Evanilson'},
   {'time': "90+2'",
    'player': 'Soler J.',
    'type': 'substitution',
    'player_in': 'Soler J.',
    'player_out': 'Taverni

In [7]:
import pandas as pd
import re

def process_match_dataframe(df):
    """
    Process an existing DataFrame to:
    1. Split 'competition' column into 'competition' and 'matchweek'
    2. Add 'half_event' column based on 'time' column
    
    Args:
        df: pandas DataFrame with 'competition' and 'time' columns
    
    Returns:
        pandas DataFrame with processed columns
    """
    # Make a copy to avoid modifying the original
    df_processed = df.copy()
    
    # Split competition column
    df_processed = split_competition_column(df_processed)
    
    # Add half_event column
    df_processed = add_half_event_column(df_processed)
    
    return df_processed

def split_competition_column(df):
    """
    Split the 'competition' column into 'competition' and 'matchweek' columns.
    """
    df = df.copy()
    
    # Initialize new columns
    df['matchweek'] = None
    
    # Function to split individual competition strings
    def split_single_competition(comp_str):
        if pd.isna(comp_str) or not isinstance(comp_str, str):
            return comp_str, None
        
        # Common patterns for matchweek information
        patterns = [
            r'^(.+?)\s*-\s*(JORNADA\s+\d+)$',   # "PREMIER LEAGUE - JORNADA 38"
            r'^(.+?)\s*-\s*(MATCHDAY\s+\d+)$',  # "PREMIER LEAGUE - MATCHDAY 38"
            r'^(.+?)\s*-\s*(GAMEWEEK\s+\d+)$',  # "PREMIER LEAGUE - GAMEWEEK 38"
            r'^(.+?)\s*-\s*(WEEK\s+\d+)$',      # "PREMIER LEAGUE - WEEK 38"
            r'^(.+?)\s*-\s*(MD\s*\d+)$',        # "PREMIER LEAGUE - MD38"
            r'^(.+?)\s*-\s*(GW\s*\d+)$',        # "PREMIER LEAGUE - GW38"
            r'^(.+?)\s*-\s*(ROUND\s+\d+)$',     # "PREMIER LEAGUE - ROUND 38"
            r'^(.+?)\s*-\s*(\d+)$',             # "PREMIER LEAGUE - 38"
        ]
        
        for pattern in patterns:
            match = re.match(pattern, comp_str.strip(), re.IGNORECASE)
            if match:
                competition_name = match.group(1).strip()
                matchweek = match.group(2).strip()
                return competition_name, matchweek
        
        # If no pattern matches, return the full string as competition name
        return comp_str.strip(), None
    
    # Apply the splitting function
    split_results = df['competition'].apply(split_single_competition)
    
    # Update the columns
    df['competition'] = [result[0] for result in split_results]
    df['matchweek'] = [result[1] for result in split_results]
    
    return df

def add_half_event_column(df):
    """
    Add 'half_event' column based on the 'time' column.
    """
    df = df.copy()
    
    def determine_half(time_str):
        """
        Determine if an event occurred in the first or second half.
        """
        if pd.isna(time_str) or not isinstance(time_str, str):
            return None
        
        # Extract the base minute from formats like "74'", "45+2'", "90+1'"
        match = re.match(r'(\d+)', str(time_str).strip())
        if not match:
            return None
        
        try:
            minute = int(match.group(1))
            
            # Football halves: 1-45 minutes = First Half, 46+ minutes = Second Half
            if 1 <= minute <= 45:
                return 'First Half'
            elif minute >= 46:
                return 'Second Half'
            else:
                return None
        except ValueError:
            return None
    
    # Add the half_event column
    df['half_event'] = df['time'].apply(determine_half)
    
    return df

def reorder_columns(df):
    """
    Reorder columns for better readability, putting new columns in logical positions.
    """
    # Define preferred column order
    preferred_order = [
        'match_id', 'competition', 'matchweek', 'datetime', 'season', 
        'home_team', 'away_team', 'team_side', 'event_source', 
        'event_type', 'time', 'half_event', 'player', 'score', 
        'assist', 'player_in', 'player_out', 'reason'
    ]
    
    # Get existing columns
    existing_cols = df.columns.tolist()
    
    # Start with preferred columns that exist
    final_order = [col for col in preferred_order if col in existing_cols]
    
    # Add any remaining columns that weren't in the preferred list
    remaining_cols = [col for col in existing_cols if col not in final_order]
    final_order.extend(remaining_cols)
    
    return df[final_order]

df2 = process_match_dataframe(df)

In [8]:
df[(df['home_team'] == 'Liverpool') | (df['away_team'] == 'Liverpool') & (df['event_type'] == 'goal')]
len(df[(df['home_team'] == 'Liverpool') & (df['team_side'] == 'home') & (df['event_type'] == 'goal') &  (df['season'] == '2024/2025')])
len(df[(df['away_team'] == 'Liverpool') & (df['team_side'] == 'away') & (df['event_type'] == 'goal') &  (df['season'] == '2024/2025')])

44

In [9]:
df2[(df2['home_team'] == 'Liverpool') & (df2['team_side'] == 'home')  & (df2['season'] == '2024/2025') & (df2['matchweek'] == 'JORNADA 36')]

Unnamed: 0,match_id,competition,datetime,season,home_team,away_team,team_side,event_source,event_type,time,player,score,assist,player_in,player_out,reason,matchweek,half_event
311,20,PREMIER LEAGUE,11.05.2025 10:30,2024/2025,Liverpool,Arsenal,home,list_0,goal,20',Gakpo C.,1 - 0,Robertson A.,,,,JORNADA 36,First Half
312,20,PREMIER LEAGUE,11.05.2025 10:30,2024/2025,Liverpool,Arsenal,home,list_0,goal,21',Diaz L.,2 - 0,Szoboszlai D.,,,,JORNADA 36,First Half
313,20,PREMIER LEAGUE,11.05.2025 10:30,2024/2025,Liverpool,Arsenal,home,list_0,yellow_card,60',Bradley C.,,,,,(Entrada temeraria),JORNADA 36,Second Half
314,20,PREMIER LEAGUE,11.05.2025 10:30,2024/2025,Liverpool,Arsenal,home,list_0,substitution,66',Mac Allister A.,,,Mac Allister A.,Gakpo C.,,JORNADA 36,Second Half
315,20,PREMIER LEAGUE,11.05.2025 10:30,2024/2025,Liverpool,Arsenal,home,list_0,substitution,67',Alexander-Arnold T.,,,Alexander-Arnold T.,Bradley C.,,JORNADA 36,Second Half
316,20,PREMIER LEAGUE,11.05.2025 10:30,2024/2025,Liverpool,Arsenal,home,list_0,substitution,67',Núñez D.,,,Núñez D.,Jones C.,,JORNADA 36,Second Half
317,20,PREMIER LEAGUE,11.05.2025 10:30,2024/2025,Liverpool,Arsenal,home,list_0,substitution,79',Diogo Jota,,,Diogo Jota,Diaz L.,,JORNADA 36,Second Half
318,20,PREMIER LEAGUE,11.05.2025 10:30,2024/2025,Liverpool,Arsenal,home,list_0,substitution,83',Elliott H.,,,Elliott H.,Gravenberch R.,,JORNADA 36,Second Half
319,20,PREMIER LEAGUE,11.05.2025 10:30,2024/2025,Liverpool,Arsenal,home,list_0,,90+7',,,Robertson A.,,,,JORNADA 36,Second Half


In [10]:
df2

Unnamed: 0,match_id,competition,datetime,season,home_team,away_team,team_side,event_source,event_type,time,player,score,assist,player_in,player_out,reason,matchweek,half_event
0,0,PREMIER LEAGUE,25.05.2025 10:00,2024/2025,Bournemouth,Leicester,home,list_0,substitution,63',Jebbison D.,,,Jebbison D.,Brooks D.,,JORNADA 38,Second Half
1,0,PREMIER LEAGUE,25.05.2025 10:00,2024/2025,Bournemouth,Leicester,home,list_0,,65',,,Jebbison D.,,,,JORNADA 38,Second Half
2,0,PREMIER LEAGUE,25.05.2025 10:00,2024/2025,Bournemouth,Leicester,home,list_0,goal,74',Semenyo A.,1 - 0,Zabarnyi I.,,,,JORNADA 38,Second Half
3,0,PREMIER LEAGUE,25.05.2025 10:00,2024/2025,Bournemouth,Leicester,home,list_0,substitution,78',Huijsen D.,,,Huijsen D.,Senesi M.,,JORNADA 38,Second Half
4,0,PREMIER LEAGUE,25.05.2025 10:00,2024/2025,Bournemouth,Leicester,home,list_0,goal,88',Semenyo A.,2 - 0,Huijsen D.,,,,JORNADA 38,Second Half
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31458,2279,PREMIER LEAGUE,09.08.2019 14:00,2019/2020,Liverpool,Norwich,away,list_1,yellow_card,60',Leitner M.,,,,,(Derribar a un rival),JORNADA 1,Second Half
31459,2279,PREMIER LEAGUE,09.08.2019 14:00,2019/2020,Liverpool,Norwich,away,list_1,goal,64',Pukki T.,4 - 1,Buendia E.,,,,JORNADA 1,Second Half
31460,2279,PREMIER LEAGUE,09.08.2019 14:00,2019/2020,Liverpool,Norwich,away,list_1,yellow_card,65',Buendia E.,,,,,(Entrada temeraria),JORNADA 1,Second Half
31461,2279,PREMIER LEAGUE,09.08.2019 14:00,2019/2020,Liverpool,Norwich,away,list_1,substitution,70',Hernandez O.,,,Hernandez O.,Trybull T.,,JORNADA 1,Second Half


In [None]:
unique_seasons = list(df2['season'].unique())

In [None]:
goals_stats_teams = []

unique_seasons = list(df2['season'].unique())

for individual_season in unique_seasons:
    unique_teams = list(df2[df2['season'] == individual_season]['home_team'].unique())
    for individual_team in unique_teams:

        temporal_dict = {}

        home_goals_per_season = len(df2[(df2['home_team'] == individual_team) & (df2['event_type'] == 'goal') & (df2['team_side'] == 'home') & (df2['season'] == individual_season)])
        away_goals_per_season = len(df2[(df2['away_team'] == individual_team) & (df2['event_type'] == 'goal') & (df2['team_side'] == 'away') & (df2['season'] == individual_season)])

        average_goals_per_season = (home_goals_per_season + away_goals_per_season)/((len(unique_teams)-1)*2)

        # first half home season goals scored
        first_half_home_season_goals_scored = len(df2[(df2['home_team'] == individual_team) & (df2['event_type'] == 'goal') & (df2['team_side'] == 'home') & (df2['season'] == individual_season) & (df2['half_event'] == 'First Half')])
        # second half home season goals scored
        second_half_home_season_goals_scored = len(df2[(df2['home_team'] == individual_team) & (df2['event_type'] == 'goal') & (df2['team_side'] == 'home') & (df2['season'] == individual_season) & (df2['half_event'] == 'Second Half')])
        # first half away season goals scored
        first_half_away_season_goals_scored = len(df2[(df2['away_team'] == individual_team) & (df2['event_type'] == 'goal') & (df2['team_side'] == 'away') & (df2['season'] == individual_season) & (df2['half_event'] == 'First Half')])
        # first half away season goals scored
        second_half_away_season_goals_scored = len(df2[(df2['away_team'] == individual_team) & (df2['event_type'] == 'goal') & (df2['team_side'] == 'away') & (df2['season'] == individual_season) & (df2['half_event'] == 'Second Half')])
        # first half season goals average scored
        average_first_half_goals_scored = (first_half_home_season_goals_scored + first_half_away_season_goals_scored)/((len(unique_teams)-1)*2)
        # second half season goals average scored
        average_second_half_goals_scored = (second_half_away_season_goals_scored + second_half_home_season_goals_scored)/((len(unique_teams)-1)*2)

        average_home_game_first_half_goals_scored = first_half_home_season_goals_scored/(len(unique_teams)-1)

        average_home_game_second_half_goals_scored = second_half_home_season_goals_scored/(len(unique_teams)-1)

        average_away_game_first_half_goals_scored = first_half_away_season_goals_scored/(len(unique_teams)-1)

        average_away_game_second_half_goals_scored = second_half_away_season_goals_scored/(len(unique_teams)-1)


        # print(f'season: {individual_season} , team: {individual_team}: home goals: {home_goals_per_season}, away goals: {away_goals_per_season}, average goals: {average_goals_per_season}')
        # print('==============================')
        # print(f'first half all season goals scored: {first_half_home_season_goals_scored + first_half_away_season_goals_scored} second half all season goals scored: {second_half_away_season_goals_scored + second_half_home_season_goals_scored}')
        # print('==============================')
        # print(f'average goals first half: {average_first_half_goals_scored} average goals second half: {average_second_half_goals_scored}')
        # print('==============================')
        # print(f'home game first half goals scored: {first_half_home_season_goals_scored} home game second half goals scored {second_half_home_season_goals_scored}')
        # print('==============================')
        # print(f'average home game first half goals: {average_home_game_first_half_goals_scored} average home game second half goals: {average_home_game_second_half_goals_scored}')
        # print('==============================')
        # print(f'away game first half goals scored:{first_half_away_season_goals_scored}  away game second half goals scored:{second_half_away_season_goals_scored}')
        # print('==============================')
        # print(f'average away game first half goals: {average_away_game_first_half_goals_scored} average away game second half goals: {average_away_game_second_half_goals_scored}')

        temporal_dict.update({'season': individual_season, 'team': individual_team, 'home_goals_season': home_goals_per_season, 'away_goals_season':away_goals_per_season,
                              'average_goals_season':average_goals_per_season, 'first_half_season_goals_scored': first_half_home_season_goals_scored + first_half_away_season_goals_scored,
                              'second_half_season_goals_scored':second_half_away_season_goals_scored + second_half_home_season_goals_scored, 'average_season_first_half_goals_scored':average_first_half_goals_scored,
                              'average_season_second_half_goals_scored':average_second_half_goals_scored,'first_half_home_season_goals_scored':first_half_home_season_goals_scored,
                              'second_half_home_season_goals_scored':second_half_home_season_goals_scored, 'average_home_game_first_half_goals_scored':average_home_game_first_half_goals_scored,
                              'average_home_game_second_half_goals_scored':average_home_game_second_half_goals_scored,'first_half_away_season_goals_scored':first_half_away_season_goals_scored,
                              'second_half_away_season_goals_scored':second_half_away_season_goals_scored,'average_away_game_first_half_goals_scored':average_away_game_first_half_goals_scored,
                              'average_away_game_second_half_goals_scored':average_away_game_second_half_goals_scored})
        
        goals_stats_teams.append(temporal_dict)

        
       # print('===================== NEXT TEAM =============================')

season: 2024/2025 , team: Bournemouth: home goals: 23, away goals: 35, average goals: 1.5263157894736843
first half all season goals scored: 21 second half all season goals scored: 37
average goals first half: 0.5526315789473685 average goals second half: 0.9736842105263158
home game first half goals scored: 10 home game second half goals scored 13
average home game first half goals: 0.5263157894736842 average home game second half goals: 0.6842105263157895
away game first half goals scored:11  away game second half goals scored:24
average away game first half goals: 0.5789473684210527 average away game second half goals: 1.263157894736842
season: 2024/2025 , team: Fulham: home goals: 27, away goals: 27, average goals: 1.4210526315789473
first half all season goals scored: 22 second half all season goals scored: 32
average goals first half: 0.5789473684210527 average goals second half: 0.8421052631578947
home game first half goals scored: 15 home game second half goals scored 12
averag

In [None]:
goals_stats_teams[3] 
x=0

{'season': '2024/2025',
 'team': 'Liverpool',
 'home_goals_season': 42,
 'away_goals_season': 44,
 'average_goals_season': 2.263157894736842,
 'first_half_season_goals_scored': 40,
 'second_half_season_goals_scored': 46,
 'average_season_first_half_goals_scored': 1.0526315789473684,
 'average_season_second_half_goals_scored': 1.2105263157894737,
 'first_half_home_season_goals_scored': 20,
 'second_half_home_season_goals_scored': 22,
 'average_home_game_first_half_goals_scored': 1.0526315789473684,
 'average_home_game_second_half_goals_scored': 1.1578947368421053,
 'first_half_away_season_goals_scored': 20,
 'second_half_away_season_goals_scored': 24,
 'average_away_game_first_half_goals_scored': 1.0526315789473684,
 'average_away_game_second_half_goals_scored': 1.263157894736842}