In [38]:
# Optional: install requirements (recommended to use requirements.txt)
# If you need to install dependencies in the notebook environment, uncomment one of the lines below.
# It's better to run these once in your environment or use a virtualenv and install from requirements.txt.
# pip install -r ../requirements.txt
# or (not recommended to run on every notebook execution):
# pip -q install pandas requests python-dateutil python-dotenv


# Ligat Ha'al – Refactored for local/VSCode runs
This notebook uses a .env file for secrets and stores all data under `data/` (see `requirements.txt` and `.gitignore`).

In [39]:
# Environment setup — load .env and prepare directories
from pathlib import Path
import os, time, requests
import pandas as pd
from dateutil import parser as dateparser
from dotenv import load_dotenv, find_dotenv

# First try to load any .env discovered by find_dotenv()
load_dotenv(find_dotenv())

ROOT = Path.cwd().parent
DATA_DIR = Path(os.getenv("LIGAT_DATA_DIR", str(ROOT / "data" / "raw")))
DATA_DIR.mkdir(parents=True, exist_ok=True)
INTERIM_DIR   = DATA_DIR.parent / "interim";   INTERIM_DIR.mkdir(parents=True, exist_ok=True)
PROCESSED_DIR = DATA_DIR.parent / "processed"; PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
FIG_DIR       = ROOT / "reports" / "figures";  FIG_DIR.mkdir(parents=True, exist_ok=True)

# Load APISPORTS_KEY from environment; prefer existing env, otherwise try key.env in likely locations
APISPORTS_KEY = os.getenv("APISPORTS_KEY")
if not APISPORTS_KEY:
    # Common places where key.env might be located (repo root, notebooks/, current working dir)
    candidates = [
        ROOT / "key.env",
        ROOT / "notebooks" / "key.env",
        Path.cwd() / "key.env",
        ROOT / ".env",
    ]
    for p in candidates:
        try:
            if p.exists():
                load_dotenv(dotenv_path=str(p), override=True)
                APISPORTS_KEY = os.getenv("APISPORTS_KEY")
                if APISPORTS_KEY:
                    print(f"Loaded APISPORTS_KEY from: {p}")
                    break
        except Exception:
            # ignore malformed files and continue
            continue

if not APISPORTS_KEY:
    raise RuntimeError(
        "Missing APISPORTS_KEY. Create a 'key.env' or '.env' file containing the line:\nAPISPORTS_KEY=your_actual_api_key_here"
    )

BASE_URL = "https://v3.football.api-sports.io"
HEADERS  = {"x-apisports-key": APISPORTS_KEY}

print('ROOT:', ROOT)
print('DATA_DIR:', DATA_DIR)


ROOT: c:\Users\nitib\dev-lab\ligat_haal_project\ligat_haal_project
DATA_DIR: c:\Users\nitib\dev-lab\ligat_haal_project\ligat_haal_project\data\raw


In [40]:
# small local test file to ensure DATA_DIR is writable
with open(DATA_DIR / "test.txt", "w", encoding="utf-8") as f:
    f.write("שלום מהשותף הראשון :)")

print("נוצר קובץ בדיקה:", DATA_DIR / "test.txt")


נוצר קובץ בדיקה: c:\Users\nitib\dev-lab\ligat_haal_project\ligat_haal_project\data\raw\test.txt


In [41]:
# ====== ליגת העל בלבד (APISports) – הצגה + הורדה ======
# (Dependencies are centralized in the optional setup cell at the top)

import os, time
import pandas as pd
from dateutil import parser as dateparser

def api_get(path, params=None, pause=0.6):
    r = requests.get(f"{BASE_URL}{path}", headers=HEADERS, params=params or {}, timeout=30)
    if r.status_code != 200:
        raise RuntimeError(f"API error {r.status_code}: {r.text[:300]}")
    time.sleep(pause)
    return r.json()

# --- שלב 1: הצגת כל הליגות בישראל כדי שתראה מה קיים ---
leagues = api_get("/leagues", {"country": "Israel"})
rows = []
for item in leagues.get("response", []):
    lg = item.get("league", {})
    cn = item.get("country", {})
    if cn.get("name") == "Israel":
        rows.append({"id": lg.get("id"), "name": lg.get("name"), "type": lg.get("type")})
israel_leagues_df = pd.DataFrame(rows).sort_values(by=["type","name"]).reset_index(drop=True)
print("ליגות שנמצאו בישראל:")
display(israel_leagues_df)

PREFERRED_NAMES = { 'ligat haal', 'ligat ha’al', 'ligat ha`al', 'israeli premier league', 'premier league' }
def choose_israeli_premier(df: pd.DataFrame):
    df = df[df['type'].str.lower() == 'league'].copy()
    for _, row in df.iterrows():
        name_norm = (row['name'] or '').lower().strip()
        if any(p in name_norm for p in PREFERRED_NAMES):
            return int(row['id']), row['name']
    for _, row in df.iterrows():
        if 'ligat' in (row['name'] or '').lower():
            return int(row['id']), row['name']
    if not df.empty:
        r0 = df.iloc[0]
        return int(r0['id']), r0['name']
    return None, None

LEAGUE_ID, LEAGUE_NAME = choose_israeli_premier(israel_leagues_df)
assert LEAGUE_ID is not None, 'לא נמצא מזהה ליגת העל.'
print(f'נבחרה ליגה: {LEAGUE_NAME} (ID={LEAGUE_ID})')

SEASON_YEAR = 2022
fx = api_get('/fixtures', {'league': LEAGUE_ID, 'season': SEASON_YEAR, 'timezone': 'UTC'})
rows = []
for item in fx.get('response', []):
    fixture = item.get('fixture', {})
    league  = item.get('league', {})
    teams   = item.get('teams', {})
    goals   = item.get('goals', {})
    dt = fixture.get('date')
    try:
        dt = dateparser.parse(dt).strftime('%Y-%m-%d') if dt else None
    except:
        dt = None
    rows.append({
        'season': f'{SEASON_YEAR}/{str(SEASON_YEAR+1)[-2:]}',
        'date': dt,
        'round': league.get('round'),
        'stage': league.get('name'),
        'home_team': teams.get('home', {}).get('name'),
        'away_team': teams.get('away', {}).get('name'),
        'home_goals': goals.get('home'),
        'away_goals': goals.get('away'),
        'venue': fixture.get('venue', {}).get('name'),
        'referee': fixture.get('referee'),
        'fixture_id': fixture.get('id'),
        'league_id': league.get('id'),
        'league_name': league.get('name'),
    })

df = pd.DataFrame(rows)
csv_path = DATA_DIR / f"matches_{SEASON_YEAR}_{str(SEASON_YEAR+1)[-2:]}_ligat_haal.csv"
df.to_csv(csv_path, index=False, encoding='utf-8-sig')
print(f'Saved: {csv_path} | rows: {len(df)}')
display(df.head(10))


ליגות שנמצאו בישראל:


Unnamed: 0,id,name,type
0,384,State Cup,Cup
1,659,Super Cup,Cup
2,385,Toto Cup Ligat Al,Cup
3,496,Liga Alef,League
4,382,Liga Leumit,League
5,383,Ligat Ha'al,League


נבחרה ליגה: Ligat Ha'al (ID=383)
Saved: c:\Users\nitib\dev-lab\ligat_haal_project\ligat_haal_project\data\raw\matches_2022_23_ligat_haal.csv | rows: 240


Unnamed: 0,season,date,round,stage,home_team,away_team,home_goals,away_goals,venue,referee,fixture_id,league_id,league_name
0,2022/23,2022-08-20,Regular Season - 1,Ligat Ha'al,Hapoel Haifa,Hapoel Tel Aviv,2,0,Sammy Ofer Stadium,O. Grinfeeld,865835,383,Ligat Ha'al
1,2022/23,2022-08-20,Regular Season - 1,Ligat Ha'al,Hapoel Katamon,Hapoel Hadera,1,1,HaMoshava Stadium,A. Shiloach,865840,383,Ligat Ha'al
2,2022/23,2022-08-20,Regular Season - 1,Ligat Ha'al,Maccabi Netanya,Beitar Jerusalem,4,1,Netanya Stadium,R. Reinshreiber,865837,383,Ligat Ha'al
3,2022/23,2022-08-21,Regular Season - 1,Ligat Ha'al,Maccabi Tel Aviv,Maccabi Bnei Raina,5,0,Bloomfield Stadium,I. Frid,865841,383,Ligat Ha'al
4,2022/23,2022-08-22,Regular Season - 1,Ligat Ha'al,Sektzia Nes Tziona,Ironi Kiryat Shmona,0,2,HaMoshava Stadium,Y. Mizrahi,865839,383,Ligat Ha'al
5,2022/23,2022-08-27,Regular Season - 2,Ligat Ha'al,Ironi Kiryat Shmona,Hapoel Katamon,1,1,Kiryat-Shmona Municipal Stadium,O. Na'al,865844,383,Ligat Ha'al
6,2022/23,2022-08-27,Regular Season - 2,Ligat Ha'al,Hapoel Tel Aviv,Bnei Sakhnin,0,2,Bloomfield Stadium,R. Reinshreiber,865848,383,Ligat Ha'al
7,2022/23,2022-08-27,Regular Season - 2,Ligat Ha'al,Maccabi Haifa,Maccabi Netanya,4,1,Sammy Ofer Stadium,S. Levi,865847,383,Ligat Ha'al
8,2022/23,2022-08-27,Regular Season - 2,Ligat Ha'al,Ashdod,Sektzia Nes Tziona,1,0,Yud-Alef Stadium,O. Asulin,865845,383,Ligat Ha'al
9,2022/23,2022-08-28,Regular Season - 2,Ligat Ha'al,Maccabi Bnei Raina,Hapoel Haifa,1,1,Green Stadium,S. Ben Avraham,865842,383,Ligat Ha'al


In [42]:
# === העשרת הטבלה + ניקוי עמודות מיותרות ===
import re
import pandas as pd

in_path  = DATA_DIR / "matches_2022_23_ligat_haal.csv"   # שנה לקובץ שלך
out_path = INTERIM_DIR / "matches_2022_23_enriched.csv"

if not in_path.exists():
    raise FileNotFoundError(f"Input matches file not found: {in_path}")

df = pd.read_csv(in_path)

# --- עמודות עזר ---
# 1) שנה מספרית לפתיחת העונה
#df["season_year"] = df["season"].str.slice(0,4).astype(int)

# 2) מספר מחזור ו-phase
def parse_round(r):
    # דוגמאות: "Regular Season - 1", "Championship Round - 5"
    if pd.isna(r):
        return (None, None)
    r = str(r)
    m = re.search(r"(Regular|Championship|Relegation).*?(\d+)", r, flags=re.I)
    phase = None
    if "regular" in r.lower():      phase = "regular"
    elif "championship" in r.lower(): phase = "championship"
    elif "relegation" in r.lower():   phase = "relegation"
    round_num = int(m.group(2)) if m else None
    return (phase, round_num)

tmp = df["round"].apply(parse_round).tolist()
df["phase"] = [t[0] for t in tmp]
df["round_num"] = [t[1] for t in tmp]

# 3) הפרש שערים, תוצאה, נקודות
df["goal_diff"] = df["home_goals"] - df["away_goals"]
df["result"] = df["goal_diff"].apply(lambda x: "H" if x>0 else ("A" if x<0 else "D"))
df["home_points"] = df["result"].map({"H":3, "D":1, "A":0})
df["away_points"] = df["result"].map({"H":0, "D":1, "A":3})

# 4) דגל משחק חד-צדדי (למשל |GD|>=3)
df["one_sided"] = (df["goal_diff"].abs() >= 3).astype(int)

# 5) עמודות לא רלוונטיות להסרה (כפי שביקשת)
drop_cols = ["league_id","league_name","fixture_id"]
df = df.drop(columns=[c for c in drop_cols if c in df.columns])

# 6) סדר עמודות נוח
cols = [
    "season","season_year","date","phase","round_num","stage",
    "home_team","away_team","home_goals","away_goals","goal_diff","result",
    "home_points","away_points","one_sided","venue","referee"
]
df = df[[c for c in cols if c in df.columns]]

df.to_csv(out_path, index=False, encoding="utf-8-sig")
print("נשמר:", out_path, "| שורות:", len(df))
df.head(10)


נשמר: c:\Users\nitib\dev-lab\ligat_haal_project\ligat_haal_project\data\interim\matches_2022_23_enriched.csv | שורות: 240


Unnamed: 0,season,date,phase,round_num,stage,home_team,away_team,home_goals,away_goals,goal_diff,result,home_points,away_points,one_sided,venue,referee
0,2022/23,2022-08-20,regular,1,Ligat Ha'al,Hapoel Haifa,Hapoel Tel Aviv,2,0,2,H,3,0,0,Sammy Ofer Stadium,O. Grinfeeld
1,2022/23,2022-08-20,regular,1,Ligat Ha'al,Hapoel Katamon,Hapoel Hadera,1,1,0,D,1,1,0,HaMoshava Stadium,A. Shiloach
2,2022/23,2022-08-20,regular,1,Ligat Ha'al,Maccabi Netanya,Beitar Jerusalem,4,1,3,H,3,0,1,Netanya Stadium,R. Reinshreiber
3,2022/23,2022-08-21,regular,1,Ligat Ha'al,Maccabi Tel Aviv,Maccabi Bnei Raina,5,0,5,H,3,0,1,Bloomfield Stadium,I. Frid
4,2022/23,2022-08-22,regular,1,Ligat Ha'al,Sektzia Nes Tziona,Ironi Kiryat Shmona,0,2,-2,A,0,3,0,HaMoshava Stadium,Y. Mizrahi
5,2022/23,2022-08-27,regular,2,Ligat Ha'al,Ironi Kiryat Shmona,Hapoel Katamon,1,1,0,D,1,1,0,Kiryat-Shmona Municipal Stadium,O. Na'al
6,2022/23,2022-08-27,regular,2,Ligat Ha'al,Hapoel Tel Aviv,Bnei Sakhnin,0,2,-2,A,0,3,0,Bloomfield Stadium,R. Reinshreiber
7,2022/23,2022-08-27,regular,2,Ligat Ha'al,Maccabi Haifa,Maccabi Netanya,4,1,3,H,3,0,1,Sammy Ofer Stadium,S. Levi
8,2022/23,2022-08-27,regular,2,Ligat Ha'al,Ashdod,Sektzia Nes Tziona,1,0,1,H,3,0,0,Yud-Alef Stadium,O. Asulin
9,2022/23,2022-08-28,regular,2,Ligat Ha'al,Maccabi Bnei Raina,Hapoel Haifa,1,1,0,D,1,1,0,Green Stadium,S. Ben Avraham


In [None]:
# sync_test.py
# קובץ בדיקה לסנכרון בין 

def sync_check():
    print("✅ Git sync test successful! — שלום משני הצדדים 😎")

if __name__ == "__main__":
    sync_check()


✅ Git sync test successful! — שלום משני הצדדים 😎
