## 1. Setup

In [7]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from pathlib import Path
import re

# Setup paths
def _find_root(start=None):
    p = start or Path.cwd()
    for _ in range(6):
        if (p / 'data').exists() or (p / '.git').exists() or (p / 'notebooks').exists():
            return p
        p = p.parent
    return Path.cwd()

ROOT = _find_root()
PLAYOFFS_DIR = ROOT / 'data' / 'playoffs'
PLAYOFFS_DIR.mkdir(parents=True, exist_ok=True)

print(f"‚úÖ Setup complete")
print(f"   ROOT: {ROOT}")
print(f"   PLAYOFFS_DIR: {PLAYOFFS_DIR}")

‚úÖ Setup complete
   ROOT: c:\Users\nitib\dev-lab\ligat_haal_project\ligat_haal_project\notebooks
   PLAYOFFS_DIR: c:\Users\nitib\dev-lab\ligat_haal_project\ligat_haal_project\notebooks\data\playoffs


## 2. Define Season Parameters

IFA website structure:
- `league_id=40` - Championship Playoff (◊§◊ú◊ô◊ô◊ê◊ï◊£ ◊¢◊ú◊ô◊ï◊ü)
- `season_id` varies by year

Need to find the season_id for each year from 2009/10 onwards.

In [8]:
# Season mapping - need to find correct season_id for each year
# These are examples, need to verify actual IDs
SEASON_MAPPING = {
    '2024/25': {'season_id': 11, 'league_id': 40},  # From your URL
    '2023/24': {'season_id': 10, 'league_id': 40},
    '2022/23': {'season_id': 9, 'league_id': 40},
    '2021/22': {'season_id': 8, 'league_id': 40},
    '2020/21': {'season_id': 7, 'league_id': 40},
    '2019/20': {'season_id': 6, 'league_id': 40},
    '2018/19': {'season_id': 5, 'league_id': 40},
    '2017/18': {'season_id': 4, 'league_id': 40},
    '2016/17': {'season_id': 3, 'league_id': 40},
    '2015/16': {'season_id': 2, 'league_id': 40},
    '2014/15': {'season_id': 1, 'league_id': 40},
    # Add more seasons as needed
}

print("üìã Season mapping defined:")
for season, params in SEASON_MAPPING.items():
    print(f"   {season}: season_id={params['season_id']}, league_id={params['league_id']}")

üìã Season mapping defined:
   2024/25: season_id=11, league_id=40
   2023/24: season_id=10, league_id=40
   2022/23: season_id=9, league_id=40
   2021/22: season_id=8, league_id=40
   2020/21: season_id=7, league_id=40
   2019/20: season_id=6, league_id=40
   2018/19: season_id=5, league_id=40
   2017/18: season_id=4, league_id=40
   2016/17: season_id=3, league_id=40
   2015/16: season_id=2, league_id=40
   2014/15: season_id=1, league_id=40


## 3. Test Single Season Scraping

In [10]:
def scrape_ifa_playoff_round(season_id, league_id=40):
    """
    Scrape playoff data from IFA website
    
    Args:
        season_id: Season ID from IFA website
        league_id: League ID (40 = Championship Playoff)
    """
    url = f"https://www.football.org.il/leagues/league/?league_id={league_id}&season_id={season_id}"
    
    print(f"üîç Attempting to scrape: {url}")
    
    try:
        # More complete headers to mimic real browser
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'he-IL,he;q=0.9,en-US;q=0.8,en;q=0.7',
            'Accept-Encoding': 'gzip, deflate, br',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
            'Referer': 'https://www.football.org.il/',
        }
        
        # Add delay to be respectful
        time.sleep(2)
        
        response = requests.get(url, headers=headers, timeout=15)
        
        print(f"üì° Response status: {response.status_code}")
        
        if response.status_code == 403:
            print("‚ö†Ô∏è Website is blocking automated requests (403 Forbidden)")
            print("üí° Recommendations:")
            print("   1. Use browser Developer Tools to copy the page HTML manually")
            print("   2. Use Selenium with a real browser")
            print("   3. Enter data manually from the website")
            return None
        
        response.raise_for_status()
        
        soup = BeautifulSoup(response.content, 'html.parser')
        
        print(f"üìÑ Page title: {soup.title.string if soup.title else 'No title'}")
        
        # Look for common table/match structures
        tables = soup.find_all('table')
        print(f"üìã Found {len(tables)} tables")
        
        divs_with_class = soup.find_all('div', class_=True)
        print(f"üì¶ Found {len(divs_with_class)} divs with classes")
        
        # Try to find matches
        for table in tables[:3]:  # Check first 3 tables
            rows = table.find_all('tr')
            if rows:
                print(f"\nüîç Table with {len(rows)} rows found")
                print("Sample row HTML:")
                print(rows[0].prettify()[:500] if rows else "No rows")
        
        return soup
        
    except requests.RequestException as e:
        print(f"‚ùå Error: {e}")
        return None

# Test with 2024/25 season
print("=" * 80)
test_soup = scrape_ifa_playoff_round(season_id=11, league_id=40)

üîç Attempting to scrape: https://www.football.org.il/leagues/league/?league_id=40&season_id=11
üì° Response status: 403
‚ö†Ô∏è Website is blocking automated requests (403 Forbidden)
üí° Recommendations:
   1. Use browser Developer Tools to copy the page HTML manually
   2. Use Selenium with a real browser
   3. Enter data manually from the website
üì° Response status: 403
‚ö†Ô∏è Website is blocking automated requests (403 Forbidden)
üí° Recommendations:
   1. Use browser Developer Tools to copy the page HTML manually
   2. Use Selenium with a real browser
   3. Enter data manually from the website


In [11]:
# Let's try to find if there's an API endpoint
# Often modern websites load data via AJAX/API calls

def try_api_endpoints(season_id=11, league_id=40):
    """Try different API endpoint patterns"""
    
    base_patterns = [
        f"https://www.football.org.il/api/leagues/{league_id}/seasons/{season_id}/matches",
        f"https://www.football.org.il/api/league/{league_id}/season/{season_id}",
        f"https://www.football.org.il/wp-json/football/v1/league/{league_id}/season/{season_id}",
        f"https://www.football.org.il/leagues/get_schedule?league_id={league_id}&season_id={season_id}",
    ]
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
        'Accept': 'application/json',
        'Referer': f'https://www.football.org.il/leagues/league/?league_id={league_id}&season_id={season_id}',
    }
    
    for url in base_patterns:
        print(f"\nüîç Trying: {url}")
        try:
            time.sleep(1)
            response = requests.get(url, headers=headers, timeout=10)
            print(f"   Status: {response.status_code}")
            
            if response.status_code == 200:
                print(f"   ‚úÖ Success! Content type: {response.headers.get('content-type')}")
                print(f"   Content length: {len(response.content)} bytes")
                
                # Try to parse as JSON
                try:
                    data = response.json()
                    print(f"   üì¶ JSON data keys: {list(data.keys())[:10]}")
                    return data
                except:
                    print(f"   üìÑ HTML/Text content preview:")
                    print(response.text[:500])
            
        except Exception as e:
            print(f"   ‚ùå Error: {e}")
    
    return None

# Try to find API
api_data = try_api_endpoints(season_id=11, league_id=40)


üîç Trying: https://www.football.org.il/api/leagues/40/seasons/11/matches
   Status: 403

üîç Trying: https://www.football.org.il/api/league/40/season/11
   Status: 403

üîç Trying: https://www.football.org.il/wp-json/football/v1/league/40/season/11
   Status: 403

üîç Trying: https://www.football.org.il/leagues/get_schedule?league_id=40&season_id=11
   Status: 403


## ‚ö†Ô∏è ◊î◊û◊°◊ß◊†◊î: ◊î◊ê ◊™◊® ◊ó◊ï◊°◊ù ◊°◊ß◊®◊ô◊ô◊§◊ô◊†◊í ◊ê◊ï◊ò◊ï◊û◊ò◊ô

◊î◊ê◊™◊® ◊©◊ú ◊î◊î◊™◊ê◊ó◊ì◊ï◊™ ◊ó◊ï◊°◊ù ◊õ◊ú ◊ë◊ß◊©◊ï◊™ ◊ê◊ï◊ò◊ï◊û◊ò◊ô◊ï◊™ (403 Forbidden).

### ◊î◊§◊™◊®◊ï◊ü ◊î◊û◊¢◊©◊ô: ◊õ◊†◊ô◊°◊î ◊ô◊ì◊†◊ô◊™ ◊û◊ê◊ï◊®◊í◊†◊™

◊†◊¶◊ï◊® ◊õ◊ú◊ô ◊©◊ô◊¢◊ñ◊ï◊® ◊ú◊î◊ñ◊ô◊ü ◊ê◊™ ◊î◊†◊™◊ï◊†◊ô◊ù ◊ô◊ì◊†◊ô◊™ ◊ë◊¶◊ï◊®◊î ◊û◊î◊ô◊®◊î ◊ï◊ô◊¢◊ô◊ú◊î:

1. **◊ò◊ï◊§◊° ◊î◊ñ◊†◊î ◊§◊©◊ï◊ò** - ◊†◊õ◊†◊ô◊° ◊û◊ó◊ñ◊ï◊® ◊ê◊ó◊® ◊û◊ó◊ñ◊ï◊®
2. **◊ï◊ú◊ô◊ì◊¶◊ô◊î ◊ê◊ï◊ò◊ï◊û◊ò◊ô◊™** - ◊ú◊ï◊ï◊ì◊ê ◊©◊õ◊ú ◊ß◊ë◊ï◊¶◊î ◊û◊©◊ó◊ß◊™ ◊§◊¢◊ù ◊ê◊ó◊™
3. **◊©◊û◊ô◊®◊î ◊û◊ô◊ô◊ì◊ô◊™** - ◊õ◊ú ◊û◊ó◊ñ◊ï◊® ◊†◊©◊û◊® ◊ë◊†◊§◊®◊ì

In [12]:
# ◊õ◊ú◊ô ◊¢◊ñ◊® ◊ú◊î◊ñ◊†◊™ ◊†◊™◊ï◊†◊ô ◊§◊ú◊ô◊ô◊ê◊ï◊£ ◊ô◊ì◊†◊ô◊™
# Manual data entry helper for playoff data

def create_manual_entry_template(season_str='2024/25', num_teams=6, num_rounds=10):
    """
    Create a template CSV for manual data entry
    Format matches what we'll see on IFA website
    """
    
    print(f"üìù Creating manual entry template for {season_str}")
    print(f"   Teams: {num_teams}, Expected rounds: {num_rounds}")
    print(f"\nüåê Visit: https://www.football.org.il/leagues/league/?league_id=40&season_id=11")
    print(f"\nüìã Instructions:")
    print(f"   1. For each round (◊û◊ó◊ñ◊ï◊® 35-44), note:")
    print(f"      - Round number (◊û◊ó◊ñ◊ï◊®)")
    print(f"      - Date (◊™◊ê◊®◊ô◊ö)")
    print(f"      - Home team (◊û◊©◊ó◊ß)")
    print(f"      - Away team (◊û◊©◊ó◊ß)")
    print(f"      - Score (◊™◊ï◊¶◊ê◊î)")
    print(f"   2. Fill in the data below")
    print(f"   3. Run validation to check correctness")
    print(f"\n" + "="*80)
    
    # Create empty template
    template_data = {
        'season': [],
        'round': [],
        'date': [],
        'home_team': [],
        'away_team': [],
        'home_goals': [],
        'away_goals': [],
    }
    
    return pd.DataFrame(template_data)

def validate_playoff_data(df, num_teams=6):
    """
    Validate manually entered playoff data
    Check that each team plays exactly once per round
    """
    print("üîç Validating data...")
    
    issues = []
    
    for round_num in sorted(df['round'].unique()):
        round_df = df[df['round'] == round_num]
        
        # Get all teams in this round
        teams_in_round = []
        for _, match in round_df.iterrows():
            teams_in_round.append(match['home_team'])
            teams_in_round.append(match['away_team'])
        
        # Check for duplicates
        team_counts = pd.Series(teams_in_round).value_counts()
        
        print(f"\n  Round {round_num}: {len(round_df)} matches")
        
        duplicates = team_counts[team_counts > 1]
        if not duplicates.empty:
            issues.append(f"Round {round_num}: Teams playing more than once: {duplicates.to_dict()}")
            print(f"    ‚ö†Ô∏è Duplicates: {duplicates.to_dict()}")
        
        if len(set(teams_in_round)) != num_teams:
            issues.append(f"Round {round_num}: Expected {num_teams} teams, found {len(set(teams_in_round))}")
            print(f"    ‚ö†Ô∏è Expected {num_teams} teams, found {len(set(teams_in_round))}")
        else:
            print(f"    ‚úÖ All {num_teams} teams present, each once")
    
    if not issues:
        print(f"\n‚úÖ All validations passed!")
        return True
    else:
        print(f"\n‚ùå Found {len(issues)} issues:")
        for issue in issues:
            print(f"   - {issue}")
        return False

# Create template
template_df = create_manual_entry_template('2024/25')

üìù Creating manual entry template for 2024/25
   Teams: 6, Expected rounds: 10

üåê Visit: https://www.football.org.il/leagues/league/?league_id=40&season_id=11

üìã Instructions:
   1. For each round (◊û◊ó◊ñ◊ï◊® 35-44), note:
      - Round number (◊û◊ó◊ñ◊ï◊®)
      - Date (◊™◊ê◊®◊ô◊ö)
      - Home team (◊û◊©◊ó◊ß)
      - Away team (◊û◊©◊ó◊ß)
      - Score (◊™◊ï◊¶◊ê◊î)
   2. Fill in the data below
   3. Run validation to check correctness



## üí° ◊î◊§◊™◊®◊ï◊ü ◊î◊û◊¢◊©◊ô: ◊î◊ï◊°◊§◊™ ◊û◊ó◊ñ◊ï◊®◊ô◊ù ◊ú◊†◊™◊ï◊†◊ô◊ù ◊ß◊ô◊ô◊û◊ô◊ù

◊ë◊û◊ß◊ï◊ù ◊ú◊ê◊°◊ï◊£ ◊î◊õ◊ú ◊û◊ó◊ì◊©, **◊†◊©◊™◊û◊© ◊ë◊†◊™◊ï◊†◊ô◊ù ◊û◊ï◊ï◊ô◊ß◊ô◊§◊ì◊ô◊î ◊©◊õ◊ë◊® ◊ô◊© ◊ú◊†◊ï**
◊ï◊§◊©◊ï◊ò **◊†◊ï◊°◊ô◊£ ◊¢◊û◊ï◊ì◊™ ◊û◊ó◊ñ◊ï◊®** ◊¢◊ú ◊ë◊°◊ô◊° ◊î◊ê◊™◊® ◊©◊ú ◊î◊î◊™◊ê◊ó◊ì◊ï◊™.

### ◊î◊©◊ô◊ò◊î:
1. ◊†◊¶◊ô◊í ◊ê◊™ ◊î◊û◊©◊ó◊ß◊ô◊ù ◊î◊ß◊ô◊ô◊û◊ô◊ù ◊û◊û◊ï◊ï◊ô◊ß◊ô◊§◊ì◊ô◊î
2. ◊†◊ô◊õ◊†◊° ◊ú◊ê◊™◊® ◊î◊î◊™◊ê◊ó◊ì◊ï◊™ ◊ë◊õ◊ú ◊û◊ó◊ñ◊ï◊®
3. ◊†◊ñ◊î◊î ◊ê◊ô◊ñ◊î ◊û◊©◊ó◊ß◊ô◊ù ◊©◊ô◊ô◊õ◊ô◊ù ◊ú◊ê◊ô◊ñ◊î ◊û◊ó◊ñ◊ï◊®
4. ◊†◊ñ◊ô◊ü ◊®◊©◊ô◊û◊î ◊©◊ú ◊û◊°◊§◊®◊ô ◊û◊ó◊ñ◊ï◊®◊ô◊ù
5. ◊†◊©◊û◊ï◊® ◊ß◊ï◊ë◊• ◊ó◊ì◊© ◊¢◊ù ◊î◊¢◊û◊ï◊ì◊î `round`

In [14]:
# Load existing Wikipedia data and prepare for round assignment
season_to_process = '2024/25'
wiki_file = PLAYOFFS_DIR / f'playoffs_championship_{season_to_process.replace("/", "_")}_ligat_haal_wikipedia.csv'

if wiki_file.exists():
    df_wiki = pd.read_csv(wiki_file)
    
    print(f"üìä Loaded {season_to_process} playoff data: {len(df_wiki)} matches")
    print(f"\nüîó IFA Website: https://www.football.org.il/leagues/league/?league_id=40&season_id=11")
    print(f"\nüìã Matches from Wikipedia (in current order):")
    print("="*100)
    
    for idx, row in df_wiki.iterrows():
        # Handle different column names
        if 'home_team' in df_wiki.columns:
            home = row['home_team']
            away = row['away_team']
            hg = row['home_goals']
            ag = row['away_goals']
        else:
            home = row['home'] if 'home' in row else row.get('home_team', '')
            away = row['away'] if 'away' in row else row.get('away_team', '')
            if 'score' in row:
                hg, ag = row['score'].split(':')
            else:
                hg = row.get('home_goals', '')
                ag = row.get('away_goals', '')
        
        # Format output (handle int/string for goals)
        print(f"  [{idx:2d}] {str(home):25s} {hg}-{ag} {away}")
    
    print(f"\n" + "="*100)
    print(f"\nüí° Instructions:")
    print(f"   1. Open IFA website in browser")
    print(f"   2. Select round 35 (◊û◊ó◊ñ◊ï◊® 35) from dropdown")
    print(f"   3. Note which matches (by index [0], [1], etc.) are in round 35")
    print(f"   4. Repeat for rounds 36-44")
    print(f"   5. Create a list: round_numbers = [35, 35, 35, 36, 36, ...]")
    print(f"\nüìù Example:")
    print(f"   If matches [0],[1],[2] are in round 35, and [3],[4],[5] in round 36:")
    print(f"   round_numbers = [35, 35, 35, 36, 36, 36, ...]")
    
else:
    print(f"‚ùå File not found: {wiki_file}")

üìä Loaded 2024/25 playoff data: 30 matches

üîó IFA Website: https://www.football.org.il/leagues/league/?league_id=40&season_id=11

üìã Matches from Wikipedia (in current order):
  [ 0] Maccabi Tel Aviv          1-1 Hapoel Be'er Sheva
  [ 1] Maccabi Tel Aviv          1-1 Maccabi Haifa
  [ 2] Maccabi Tel Aviv          5-0 Beitar Jerusalem
  [ 3] Maccabi Tel Aviv          3-0 Hapoel Haifa
  [ 4] Maccabi Tel Aviv          4-1 Maccabi Netanya
  [ 5] Hapoel Be'er Sheva        1-3 Maccabi Tel Aviv
  [ 6] Hapoel Be'er Sheva        4-1 Maccabi Haifa
  [ 7] Hapoel Be'er Sheva        3-1 Beitar Jerusalem
  [ 8] Hapoel Be'er Sheva        5-0 Hapoel Haifa
  [ 9] Hapoel Be'er Sheva        2-0 Maccabi Netanya
  [10] Maccabi Haifa             0-3 Maccabi Tel Aviv
  [11] Maccabi Haifa             0-3 Hapoel Be'er Sheva
  [12] Maccabi Haifa             3-3 Beitar Jerusalem
  [13] Maccabi Haifa             1-5 Hapoel Haifa
  [14] Maccabi Haifa             1-0 Maccabi Netanya
  [15] Beitar Jerusalem 

In [16]:
# Add 'round' column to all playoff Wikipedia CSV files
# ◊î◊ï◊°◊§◊™ ◊¢◊û◊ï◊ì◊™ 'round' ◊ú◊õ◊ú ◊ß◊ë◊¶◊ô ◊î◊§◊ú◊ô◊ô◊ê◊ï◊£ ◊û◊ï◊ï◊ô◊ß◊ô◊§◊ì◊ô◊î

import glob

# Get all playoff championship files from Wikipedia
playoff_files = sorted(PLAYOFFS_DIR.glob('playoffs_championship_*_ligat_haal_wikipedia.csv'))

print(f"üìä Found {len(playoff_files)} playoff files")
print("="*100)

for file in playoff_files:
    # Skip 'all_seasons' file
    if 'all_seasons' in file.name:
        continue
    
    # Extract season from filename
    # Format: playoffs_championship_YYYY_YY_ligat_haal_wikipedia.csv
    season_part = file.name.replace('playoffs_championship_', '').replace('_ligat_haal_wikipedia.csv', '')
    season_str = season_part.replace('_', '/')
    
    # Load file
    df = pd.read_csv(file)
    
    # Check if 'round' column already exists
    if 'round' in df.columns:
        print(f"‚ö†Ô∏è  {season_str}: Already has 'round' column ({len(df)} matches)")
        continue
    
    # Add empty 'round' column (will be filled manually later)
    df.insert(2, 'round', None)  # Insert as 3rd column
    
    # Save back to same file
    df.to_csv(file, index=False)
    
    print(f"‚úÖ {season_str}: Added 'round' column ({len(df)} matches)")

print("="*100)
print(f"\n‚úÖ Done! Added 'round' column to all files.")
print(f"\nüìù Next steps:")
print(f"   1. For each season, visit IFA website")
print(f"   2. Note which matches belong to each round")
print(f"   3. Fill in the 'round' values in the CSV files")
print(f"   4. Use validation function to check correctness")

üìä Found 17 playoff files
‚úÖ 2009/10: Added 'round' column (15 matches)
‚úÖ 2010/11: Added 'round' column (15 matches)
‚úÖ 2011/12: Added 'round' column (28 matches)
‚úÖ 2012/13: Added 'round' column (30 matches)
‚úÖ 2013/14: Added 'round' column (30 matches)
‚úÖ 2014/15: Added 'round' column (30 matches)
‚úÖ 2015/16: Added 'round' column (30 matches)
‚úÖ 2016/17: Added 'round' column (30 matches)
‚úÖ 2017/18: Added 'round' column (30 matches)
‚úÖ 2018/19: Added 'round' column (30 matches)
‚úÖ 2019/20: Added 'round' column (30 matches)
‚úÖ 2020/21: Added 'round' column (30 matches)
‚úÖ 2021/22: Added 'round' column (30 matches)
‚úÖ 2022/23: Added 'round' column (30 matches)
‚úÖ 2023/24: Added 'round' column (30 matches)
‚úÖ 2024/25: Added 'round' column (30 matches)

‚úÖ Done! Added 'round' column to all files.

üìù Next steps:
   1. For each season, visit IFA website
   2. Note which matches belong to each round
   3. Fill in the 'round' values in the CSV files
   4. Use validatio

## ◊õ◊ú◊ô ◊¢◊ñ◊® ◊ú◊û◊ô◊ú◊ï◊ô ◊û◊°◊§◊®◊ô ◊û◊ó◊ñ◊ï◊®◊ô◊ù

◊¢◊õ◊©◊ô◊ï ◊©◊ô◊© ◊¢◊û◊ï◊ì◊™ `round` ◊ë◊õ◊ú ◊î◊ß◊ë◊¶◊ô◊ù, ◊†◊ï◊õ◊ú ◊ú◊û◊ú◊ê ◊ê◊ï◊™◊î ◊¢◊ï◊†◊î ◊ê◊ó◊® ◊¢◊ï◊†◊î.

### ◊û◊ë◊†◊î ◊î◊û◊ó◊ñ◊ï◊®◊ô◊ù:
- **2009/10, 2010/11:** ◊û◊ó◊ñ◊ï◊®◊ô◊ù 31-35 (5 ◊û◊ó◊ñ◊ï◊®◊ô◊ù)
- **2011/12:** ◊û◊ó◊ñ◊ï◊®◊ô◊ù 31-37 ◊ê◊ï 31-44 (◊™◊ú◊ï◊ô ◊ê◊ù ◊î◊ô◊ï ◊©◊™◊ô ◊î◊§◊¢◊û◊ô◊ù)
- **2012/13 ◊ï◊ê◊ô◊ú◊ö:** ◊û◊ó◊ñ◊ï◊®◊ô◊ù 27-36 (10 ◊û◊ó◊ñ◊ï◊®◊ô◊ù)

In [17]:
# Helper function to display and update a specific season
# ◊§◊ï◊†◊ß◊¶◊ô◊ô◊™ ◊¢◊ñ◊® ◊ú◊î◊¶◊í◊î ◊ï◊¢◊ì◊õ◊ï◊ü ◊¢◊ï◊†◊î ◊°◊§◊¶◊ô◊§◊ô◊™

def display_season_for_manual_entry(season_str):
    """Display matches for a season to help with manual round assignment"""
    
    file = PLAYOFFS_DIR / f'playoffs_championship_{season_str.replace("/", "_")}_ligat_haal_wikipedia.csv'
    
    if not file.exists():
        print(f"‚ùå File not found: {file}")
        return None
    
    df = pd.read_csv(file)
    
    # Determine expected round range
    if season_str in ['2009/10', '2010/11']:
        round_range = "31-35"
        num_rounds = 5
    elif season_str == '2011/12':
        # Need to check if it's 7 rounds or more
        round_range = "31-37 (or more)"
        num_rounds = 7
    else:
        round_range = "27-36"
        num_rounds = 10
    
    print(f"üìä Season {season_str}")
    print(f"   Expected rounds: {round_range} ({num_rounds} rounds)")
    print(f"   Total matches: {len(df)}")
    print(f"   Matches per round: ~{len(df) // num_rounds}")
    print(f"\nüîó IFA Website: Check the appropriate season_id")
    print("="*100)
    
    # Display all matches with index
    for idx, row in df.iterrows():
        # Handle different column naming
        if 'home_team' in df.columns:
            home = row['home_team']
            away = row['away_team']
            hg = row.get('home_goals', '')
            ag = row.get('away_goals', '')
        else:
            home = row.get('home', '')
            away = row.get('away', '')
            if 'score' in row and pd.notna(row['score']):
                parts = str(row['score']).split(':')
                hg, ag = parts[0], parts[1] if len(parts) > 1 else ''
            else:
                hg = row.get('home_goals', '')
                ag = row.get('away_goals', '')
        
        round_val = row.get('round', '')
        round_str = f"[R{int(round_val):2d}]" if pd.notna(round_val) else "[R??]"
        
        print(f"  [{idx:2d}] {round_str} {str(home):25s} {hg}-{ag} {away}")
    
    print("="*100)
    
    return df

def update_season_rounds(season_str, round_assignments):
    """
    Update round numbers for a specific season
    
    Args:
        season_str: Season (e.g., '2024/25')
        round_assignments: List of round numbers (one per match)
    """
    file = PLAYOFFS_DIR / f'playoffs_championship_{season_str.replace("/", "_")}_ligat_haal_wikipedia.csv'
    
    if not file.exists():
        print(f"‚ùå File not found: {file}")
        return False
    
    df = pd.read_csv(file)
    
    if len(round_assignments) != len(df):
        print(f"‚ùå Error: Need {len(df)} round numbers, got {len(round_assignments)}")
        return False
    
    # Update rounds
    df['round'] = round_assignments
    
    # Validate
    print(f"üîç Validating {season_str}...")
    is_valid = validate_playoff_data(df, num_teams=6)
    
    if is_valid:
        # Save
        df.to_csv(file, index=False)
        print(f"‚úÖ Saved: {file}")
        
        print(f"\nüìä Rounds distribution:")
        print(df['round'].value_counts().sort_index())
        return True
    else:
        print(f"‚ö†Ô∏è Validation failed - not saved")
        return False

# Example: Display 2024/25 for manual entry
print("üìù Example: Display season for manual round assignment\n")
df_example = display_season_for_manual_entry('2024/25')

üìù Example: Display season for manual round assignment

üìä Season 2024/25
   Expected rounds: 27-36 (10 rounds)
   Total matches: 30
   Matches per round: ~3

üîó IFA Website: Check the appropriate season_id
  [ 0] [R??] Maccabi Tel Aviv          1-1 Hapoel Be'er Sheva
  [ 1] [R??] Maccabi Tel Aviv          1-1 Maccabi Haifa
  [ 2] [R??] Maccabi Tel Aviv          5-0 Beitar Jerusalem
  [ 3] [R??] Maccabi Tel Aviv          3-0 Hapoel Haifa
  [ 4] [R??] Maccabi Tel Aviv          4-1 Maccabi Netanya
  [ 5] [R??] Hapoel Be'er Sheva        1-3 Maccabi Tel Aviv
  [ 6] [R??] Hapoel Be'er Sheva        4-1 Maccabi Haifa
  [ 7] [R??] Hapoel Be'er Sheva        3-1 Beitar Jerusalem
  [ 8] [R??] Hapoel Be'er Sheva        5-0 Hapoel Haifa
  [ 9] [R??] Hapoel Be'er Sheva        2-0 Maccabi Netanya
  [10] [R??] Maccabi Haifa             0-3 Maccabi Tel Aviv
  [11] [R??] Maccabi Haifa             0-3 Hapoel Be'er Sheva
  [12] [R??] Maccabi Haifa             3-3 Beitar Jerusalem
  [13] [R??] Maccab

In [18]:
# Display 2009/10 season for manual round assignment
print("\n" + "="*100)
print("Starting with 2009/10 season")
print("="*100 + "\n")

df_2009_10 = display_season_for_manual_entry('2009/10')


Starting with 2009/10 season

üìä Season 2009/10
   Expected rounds: 31-35 (5 rounds)
   Total matches: 15
   Matches per round: ~3

üîó IFA Website: Check the appropriate season_id
  [ 0] [R??] Beitar Jerusalem          0-2 Bnei Yehuda
  [ 1] [R??] Beitar Jerusalem          1-2 Hapoel Tel Aviv
  [ 2] [R??] Bnei Yehuda               1-1 Maccabi Haifa
  [ 3] [R??] Bnei Yehuda               0-0 Maccabi Tel Aviv
  [ 4] [R??] F.C. Ashdod               1-2 Beitar Jerusalem
  [ 5] [R??] F.C. Ashdod               2-3 Bnei Yehuda
  [ 6] [R??] Hapoel Tel Aviv           1-0 Bnei Yehuda
  [ 7] [R??] Hapoel Tel Aviv           4-0 F.C. Ashdod
  [ 8] [R??] Hapoel Tel Aviv           0-0 Maccabi Tel Aviv
  [ 9] [R??] Maccabi Haifa             2-1 Beitar Jerusalem
  [10] [R??] Maccabi Haifa             3-1 F.C. Ashdod
  [11] [R??] Maccabi Haifa             0-1 Hapoel Tel Aviv
  [12] [R??] Maccabi Tel Aviv          3-0 Beitar Jerusalem
  [13] [R??] Maccabi Tel Aviv          2-0 F.C. Ashdod
  [14] [R?

## 4. Inspect HTML Structure

Need to manually inspect the page source to identify:
- How matches are organized
- CSS classes or IDs used
- How to extract: round number, date, teams, scores

In [4]:
# After inspecting HTML, create proper parsing function
def parse_ifa_playoff_matches(soup, season_str):
    """
    Parse matches from IFA page HTML
    
    This function needs to be customized based on actual HTML structure
    """
    matches_data = []
    
    # TODO: Implement based on actual HTML structure
    # Example structure (needs adjustment):
    """
    for match_div in soup.find_all('div', class_='match-item'):
        round_num = match_div.find('span', class_='round').text
        date = match_div.find('span', class_='date').text
        home = match_div.find('span', class_='home-team').text
        away = match_div.find('span', class_='away-team').text
        score = match_div.find('span', class_='score').text
        
        # Parse score
        home_goals, away_goals = map(int, score.split('-'))
        
        matches_data.append({
            'season': season_str,
            'round': round_num,
            'date': date,
            'home_team': home.strip(),
            'away_team': away.strip(),
            'home_goals': home_goals,
            'away_goals': away_goals
        })
    """
    
    return pd.DataFrame(matches_data)

print("‚ö†Ô∏è Parse function template created - needs HTML structure inspection")

‚ö†Ô∏è Parse function template created - needs HTML structure inspection


## 5. Alternative Approach: Manual Data Entry Helper

If scraping is complex, we can create a helper to manually enter round numbers for existing data:

In [5]:
def add_rounds_to_playoff_file(season_str, round_assignments):
    """
    Add round numbers to existing playoff CSV file
    
    Args:
        season_str: Season string (e.g., '2024/25')
        round_assignments: List of round numbers (one per match)
    """
    # Load existing file
    input_file = PLAYOFFS_DIR / f'playoffs_championship_{season_str.replace("/", "_")}_ligat_haal_wikipedia.csv'
    output_file = PLAYOFFS_DIR / f'playoffs_championship_{season_str.replace("/", "_")}_ligat_haal_with_rounds.csv'
    
    if not input_file.exists():
        print(f"‚ùå File not found: {input_file}")
        return
    
    df = pd.read_csv(input_file)
    
    if len(round_assignments) != len(df):
        print(f"‚ùå Mismatch: {len(df)} matches but {len(round_assignments)} rounds provided")
        return
    
    df['round'] = round_assignments
    df.to_csv(output_file, index=False)
    
    print(f"‚úÖ Saved: {output_file}")
    print(f"\nüìä Matches per round:")
    print(df['round'].value_counts().sort_index())
    
    return df

# Example: For 2024/25, if you know the round order from IFA website
# You can create this list by looking at the website round by round
print("üìù To use this function, visit IFA website and note which matches belong to each round")
print("   Then create a list like: [35, 35, 35, 36, 36, 36, ...]")

üìù To use this function, visit IFA website and note which matches belong to each round
   Then create a list like: [35, 35, 35, 36, 36, 36, ...]


## 6. Quick Manual Entry for 2024/25

Based on IFA website inspection, manually enter the correct round numbers:

In [6]:
# Load 2024/25 playoff data
season_2425_file = PLAYOFFS_DIR / 'playoffs_championship_2024_25_ligat_haal_wikipedia.csv'
df_2425 = pd.read_csv(season_2425_file)

print(f"üìä 2024/25 Playoff Matches ({len(df_2425)} total):")
print("\nPlease visit: https://www.football.org.il/leagues/league/?league_id=40&season_id=11")
print("And for each round (◊û◊ó◊ñ◊ï◊® 35-44), note which matches belong to it.\n")

# Display matches with index for reference
df_2425['match_id'] = range(len(df_2425))
print("Match listing:")
for idx, row in df_2425.iterrows():
    if 'home_team' in row:
        home, away = row['home_team'], row['away_team']
        goals_h, goals_a = row['home_goals'], row['away_goals']
    else:
        home, away = row['home'], row['away']
        score_parts = row['score'].split(':')
        goals_h, goals_a = score_parts[0], score_parts[1]
    
    print(f"  [{idx:2d}] {home} {goals_h}-{goals_a} {away}")

üìä 2024/25 Playoff Matches (30 total):

Please visit: https://www.football.org.il/leagues/league/?league_id=40&season_id=11
And for each round (◊û◊ó◊ñ◊ï◊® 35-44), note which matches belong to it.

Match listing:
  [ 0] Maccabi Tel Aviv 1-1 Hapoel Be'er Sheva
  [ 1] Maccabi Tel Aviv 1-1 Maccabi Haifa
  [ 2] Maccabi Tel Aviv 5-0 Beitar Jerusalem
  [ 3] Maccabi Tel Aviv 3-0 Hapoel Haifa
  [ 4] Maccabi Tel Aviv 4-1 Maccabi Netanya
  [ 5] Hapoel Be'er Sheva 1-3 Maccabi Tel Aviv
  [ 6] Hapoel Be'er Sheva 4-1 Maccabi Haifa
  [ 7] Hapoel Be'er Sheva 3-1 Beitar Jerusalem
  [ 8] Hapoel Be'er Sheva 5-0 Hapoel Haifa
  [ 9] Hapoel Be'er Sheva 2-0 Maccabi Netanya
  [10] Maccabi Haifa 0-3 Maccabi Tel Aviv
  [11] Maccabi Haifa 0-3 Hapoel Be'er Sheva
  [12] Maccabi Haifa 3-3 Beitar Jerusalem
  [13] Maccabi Haifa 1-5 Hapoel Haifa
  [14] Maccabi Haifa 1-0 Maccabi Netanya
  [15] Beitar Jerusalem 3-1 Maccabi Tel Aviv
  [16] Beitar Jerusalem 1-1 Hapoel Be'er Sheva
  [17] Beitar Jerusalem 1-2 Maccabi Haifa

## 7. Next Steps

**Option A: Web Scraping (Recommended)**
1. Inspect IFA website HTML structure
2. Implement proper parsing in `parse_ifa_playoff_matches()`
3. Scrape all playoff seasons with round numbers

**Option B: Manual Entry (Quick Fix)**
1. For each season, visit IFA website
2. Note round numbers for each match
3. Use `add_rounds_to_playoff_file()` to add round column

**Option C: Hybrid**
1. Scrape what's available from IFA
2. Manually fill gaps for older seasons