<a href="https://colab.research.google.com/github/Emtatos/Football/blob/main/123TipsUK.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# ==============================================
# Exempel på komplett kod i Colab med bättre struktur
# ==============================================

# --------------------------
# 0) Installation & imports
# --------------------------
!pip install ipywidgets scikit-learn --quiet
from google.colab import output
output.enable_custom_widget_manager()  # För att aktivera widgets i Colab

import pandas as pd
import numpy as np
import ipywidgets as widgets
from ipywidgets import Dropdown, Button, VBox, HBox, Output, Layout
from IPython.display import display

from datetime import datetime
from collections import defaultdict, deque
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV


# ------------------------------------------
# 1) Funktioner för datahämtning och städning
# ------------------------------------------
def download_and_read_data():
    """
    Hämtar sex CSV-filer från football-data.co.uk (E0, E1, E2 för 2023–24 och 2024–25),
    läser in dem i en enda DataFrame, fixar ligakolumn och returnerar.
    """
    # Ladda ner CSV-filer (ignorera om redan finns)
    !wget -nc https://www.football-data.co.uk/mmz4281/2324/E0.csv -O E0_2324.csv
    !wget -nc https://www.football-data.co.uk/mmz4281/2425/E0.csv -O E0_2425.csv
    !wget -nc https://www.football-data.co.uk/mmz4281/2324/E1.csv -O E1_2324.csv
    !wget -nc https://www.football-data.co.uk/mmz4281/2425/E1.csv -O E1_2425.csv
    !wget -nc https://www.football-data.co.uk/mmz4281/2324/E2.csv -O E2_2324.csv
    !wget -nc https://www.football-data.co.uk/mmz4281/2425/E2.csv -O E2_2425.csv

    def read_and_label(csv_file, league_label):
        df_tmp = pd.read_csv(csv_file)
        df_tmp["League"] = league_label
        return df_tmp

    df_e0_2324 = read_and_label("E0_2324.csv", "E0")
    df_e0_2425 = read_and_label("E0_2425.csv", "E0")
    df_e1_2324 = read_and_label("E1_2324.csv", "E1")
    df_e1_2425 = read_and_label("E1_2425.csv", "E1")
    df_e2_2324 = read_and_label("E2_2324.csv", "E2")
    df_e2_2425 = read_and_label("E2_2425.csv", "E2")

    df_all = pd.concat([
        df_e0_2324, df_e0_2425,
        df_e1_2324, df_e1_2425,
        df_e2_2324, df_e2_2425
    ], ignore_index=True)

    return df_all


def clean_and_prepare_data(df):
    """
    Gör datumhantering, tar bort rader utan datum eller mål,
    sorterar på datum, skapar 'ResultLabel' mm.
    """
    # Konvertera datum (DD/MM/YY -> datetime)
    df["Date"] = pd.to_datetime(df["Date"], dayfirst=True, errors="coerce")

    # Rensa bort rader utan nödvändig info
    df = df.dropna(subset=["Date", "FTHG", "FTAG", "FTR", "League"])
    df = df.sort_values("Date").reset_index(drop=True)

    # Skapa numerisk representation av H/D/A
    mapping = {"H": 0, "D": 1, "A": 2}
    df["ResultLabel"] = df["FTR"].map(mapping)

    # Gör League till categorical + LeagueCode
    df["League"] = df["League"].astype("category")
    df["LeagueCode"] = df["League"].cat.codes

    return df


# ----------------------------------------
# 2) Funktion för 5-matchers formberäkning
# ----------------------------------------
def calculate_5match_form(df_input):
    """
    Beräknar medelvärden av poäng/målskillnad/mål gjorda/insläppta
    över de 5 senaste matcherna för varje lag.
    """
    df = df_input.copy()
    # Initiera kolumner
    df["HomePoints5"] = df["HomeGD5"] = df["HomeGoalsFor5"] = df["HomeGoalsAgainst5"] = np.nan
    df["AwayPoints5"] = df["AwayGD5"] = df["AwayGoalsFor5"] = df["AwayGoalsAgainst5"] = np.nan

    home_pts_dict = defaultdict(lambda: deque([], maxlen=5))
    home_gd_dict  = defaultdict(lambda: deque([], maxlen=5))
    home_gf_dict  = defaultdict(lambda: deque([], maxlen=5))
    home_ga_dict  = defaultdict(lambda: deque([], maxlen=5))

    away_pts_dict = defaultdict(lambda: deque([], maxlen=5))
    away_gd_dict  = defaultdict(lambda: deque([], maxlen=5))
    away_gf_dict  = defaultdict(lambda: deque([], maxlen=5))
    away_ga_dict  = defaultdict(lambda: deque([], maxlen=5))

    for i, row in df.iterrows():
        home_team = row["HomeTeam"]
        away_team = row["AwayTeam"]
        fthg = row["FTHG"]
        ftag = row["FTAG"]
        ftr  = row["FTR"]

        # Sätt Homexxx5/Awayxxx5 till medelvärdet av befintliga 5 i deques
        if len(home_pts_dict[home_team]) > 0:
            df.at[i, "HomePoints5"]       = np.mean(home_pts_dict[home_team])
            df.at[i, "HomeGD5"]           = np.mean(home_gd_dict[home_team])
            df.at[i, "HomeGoalsFor5"]     = np.mean(home_gf_dict[home_team])
            df.at[i, "HomeGoalsAgainst5"] = np.mean(home_ga_dict[home_team])
        else:
            df.at[i, "HomePoints5"]       = 0
            df.at[i, "HomeGD5"]           = 0
            df.at[i, "HomeGoalsFor5"]     = 0
            df.at[i, "HomeGoalsAgainst5"] = 0

        if len(away_pts_dict[away_team]) > 0:
            df.at[i, "AwayPoints5"]       = np.mean(away_pts_dict[away_team])
            df.at[i, "AwayGD5"]           = np.mean(away_gd_dict[away_team])
            df.at[i, "AwayGoalsFor5"]     = np.mean(away_gf_dict[away_team])
            df.at[i, "AwayGoalsAgainst5"] = np.mean(away_ga_dict[away_team])
        else:
            df.at[i, "AwayPoints5"]       = 0
            df.at[i, "AwayGD5"]           = 0
            df.at[i, "AwayGoalsFor5"]     = 0
            df.at[i, "AwayGoalsAgainst5"] = 0

        # Lägg in dagens match i ordboken
        home_pts = 3 if ftr == "H" else 1 if ftr == "D" else 0
        away_pts = 3 if ftr == "A" else 1 if ftr == "D" else 0
        home_gd  = fthg - ftag
        away_gd  = ftag - fthg

        home_pts_dict[home_team].append(home_pts)
        home_gd_dict[home_team].append(home_gd)
        home_gf_dict[home_team].append(fthg)
        home_ga_dict[home_team].append(ftag)

        away_pts_dict[away_team].append(away_pts)
        away_gd_dict[away_team].append(away_gd)
        away_gf_dict[away_team].append(ftag)
        away_ga_dict[away_team].append(fthg)

    return df


# ------------------------------------------------
# 3) Träning av basmodell + hyperparametertuning
# ------------------------------------------------
def train_model_with_tuning(X_train, y_train, X_test, y_test):
    """
    Tränar först en RandomForest med standardparametrar för snabb utvärdering.
    Därefter kör vi en RandomizedSearchCV med 3-faldig cross-val för att hitta
    bättre hyperparametrar. Returnerar den bästa modellen.
    """
    print("Tränar enkel RandomForest...")
    model_eval = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
    model_eval.fit(X_train, y_train)
    y_pred_test = model_eval.predict(X_test)
    acc_test = accuracy_score(y_test, y_pred_test)
    print(f"  Enkelt test-accuracy: {acc_test:.2%}")

    print("\nStartar hyperparametertuning med RandomizedSearchCV... (kan ta en stund)\n")
    param_distributions = {
        'n_estimators': [100, 200, 300, 400],
        'max_depth': [5, 10, None],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    rf = RandomForestClassifier(random_state=42)
    random_search = RandomizedSearchCV(
        estimator=rf,
        param_distributions=param_distributions,
        n_iter=20,
        cv=3,
        n_jobs=-1,
        verbose=0,  # sätt till 1 eller 2 om du vill se mer info
        scoring='accuracy',
        random_state=42
    )
    random_search.fit(X_train, y_train)

    best_rf = random_search.best_estimator_
    best_rf.fit(X_train, y_train)
    y_pred_best = best_rf.predict(X_test)
    best_acc = accuracy_score(y_test, y_pred_best)

    print(f"  Bästa param: {random_search.best_params_}")
    print(f"  Förbättrad testnoggrannhet: {best_acc:.2%}")

    return best_rf


# --------------------------------------------
# 4) Skapa "slutgiltig" modell och form-värden
# --------------------------------------------
def compute_final_form_and_model(df_all, best_rf=None, FEATURES=None):
    """
    Bygger 5-matchers form för HELA df_all, och tränar en slutmodell (antingen
    med best_rf eller en ny random forest) på all data. Returnerar:
     - model_final (RandomForestClassifier)
     - df_all_form (DataFrame med formkolumner)
    """
    df_all_form = calculate_5match_form(df_all)
    X_final = df_all_form[FEATURES]
    y_final = df_all_form["ResultLabel"]

    if best_rf is not None:
        print("\n--- Återanvänder best_rf som slutlig modell ---")
        model_final = best_rf
        model_final.fit(X_final, y_final)
    else:
        print("\n--- Tränar en ny 'slutlig' random forest på all data (ingen tuning) ---")
        model_final = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
        model_final.fit(X_final, y_final)

    return model_final, df_all_form


# -------------------------------------------
# 5) Bygg "just nu"-form per lag (5 senaste)
# -------------------------------------------
def build_current_form_dicts(df_all):
    """
    Returnerar 8 st dictionary med "senaste 5 matchers snitt" för hemma-/bortapoäng,
    målskillnad, gjorda och insläppta mål. Används för att förutsäga nya matcher.
    Returnerar även en 'team_league_map' för att veta ligakod för varje lag.
    """
    # Sortera i datumordning
    df_all_sorted = df_all.sort_values("Date").reset_index(drop=True)

    home_pts2 = defaultdict(lambda: deque([], maxlen=5))
    home_gd2  = defaultdict(lambda: deque([], maxlen=5))
    home_gf2  = defaultdict(lambda: deque([], maxlen=5))
    home_ga2  = defaultdict(lambda: deque([], maxlen=5))

    away_pts2 = defaultdict(lambda: deque([], maxlen=5))
    away_gd2  = defaultdict(lambda: deque([], maxlen=5))
    away_gf2  = defaultdict(lambda: deque([], maxlen=5))
    away_ga2  = defaultdict(lambda: deque([], maxlen=5))

    # Spara "nylig" leagueCode per lag (så vi vet vilken liga de tillhör)
    team_league_map = {}

    # Eftersom vi loopar i KORREKT datumordning så kommer
    # "senaste leagueCode" för laget bli den sista sparade
    for i, row in df_all_sorted.iterrows():
        home_team = row["HomeTeam"]
        away_team = row["AwayTeam"]
        league_code = row["LeagueCode"]

        # Spara leagueCode ifall laget ej funnits innan.
        # (Eller skriv över, om laget bytt division, t.ex. E1 -> E0)
        team_league_map[home_team] = league_code
        team_league_map[away_team] = row["LeagueCode"]

    # Nu fyller vi själva form-statistiken
    for i, row in df_all_sorted.iterrows():
        home_team = row["HomeTeam"]
        away_team = row["AwayTeam"]
        fthg = row["FTHG"]
        ftag = row["FTAG"]
        ftr  = row["FTR"]

        hp = 3 if ftr == "H" else 1 if ftr == "D" else 0
        ap = 3 if ftr == "A" else 1 if ftr == "D" else 0
        hg = fthg - ftag
        ag = ftag - fthg

        home_pts2[home_team].append(hp)
        home_gd2[home_team].append(hg)
        home_gf2[home_team].append(fthg)
        home_ga2[home_team].append(ftag)

        away_pts2[away_team].append(ap)
        away_gd2[away_team].append(ag)
        away_gf2[away_team].append(ftag)
        away_ga2[away_team].append(fthg)

    all_teams = set(list(home_pts2.keys()) + list(away_pts2.keys()))

    # Skapa "senaste 5-matchers snitt" i ordböcker
    final_home_points_5 = {t: np.mean(home_pts2[t]) if len(home_pts2[t])>0 else 0 for t in all_teams}
    final_home_gd_5     = {t: np.mean(home_gd2[t])  if len(home_gd2[t])>0  else 0 for t in all_teams}
    final_home_gf_5     = {t: np.mean(home_gf2[t])  if len(home_gf2[t])>0  else 0 for t in all_teams}
    final_home_ga_5     = {t: np.mean(home_ga2[t])  if len(home_ga2[t])>0  else 0 for t in all_teams}

    final_away_points_5 = {t: np.mean(away_pts2[t]) if len(away_pts2[t])>0 else 0 for t in all_teams}
    final_away_gd_5     = {t: np.mean(away_gd2[t])  if len(away_gd2[t])>0  else 0 for t in all_teams}
    final_away_gf_5     = {t: np.mean(away_gf2[t])  if len(away_gf2[t])>0  else 0 for t in all_teams}
    final_away_ga_5     = {t: np.mean(away_ga2[t])  if len(away_ga2[t])>0  else 0 for t in all_teams}

    return (final_home_points_5, final_home_gd_5, final_home_gf_5, final_home_ga_5,
            final_away_points_5, final_away_gd_5, final_away_gf_5, final_away_ga_5,
            team_league_map)


# ----------------------------------------------------------
# 6) Bygga interaktiv UI: rullgardiner + prediktioner + logik
# ----------------------------------------------------------
def build_ui(model_final,
             FEATURES,
             final_home_points_5, final_home_gd_5, final_home_gf_5, final_home_ga_5,
             final_away_points_5, final_away_gd_5, final_away_gf_5, final_away_ga_5,
             team_league_map,
             league_code_to_name = {0:'E0', 1:'E1', 2:'E2'}):
    """
    Bygger interaktivt gränssnitt (ipywidgets):
    - Dropdown för antal matcher (1–13)
    - Dropdown för antal halvgarderingar
    - Knapp för "Skapa rullgardiner"
    - Knapp för "Tippar alla! utan halvgarder"
    - Knapp för "Tippar alla! med halvgarder" (med fixad halvgarder-logik)
    """

    # Omvandling av index -> tecken
    inv_map = {0: "1", 1: "X", 2: "2"}

    # 1) Skapa en lista med (visningssträng, intern_value) => "Arsenal (E0)" -> "Arsenal"
    all_teams = sorted(list(final_home_points_5.keys()))
    teams_list = []
    for t in all_teams:
        l_code = team_league_map.get(t, 0)
        l_name = league_code_to_name.get(l_code, "E0")
        display_str = f"{t} ({l_name})"
        teams_list.append((display_str, t))

    # 2) Skapa widgetar
    match_dropdown = Dropdown(
        options=[(str(i), i) for i in range(1, 14)],
        value=3,
        description='Antal matcher:',
        layout=Layout(width='150px')
    )

    halvgarde_dropdown = Dropdown(
        options=[(str(i), i) for i in range(0, match_dropdown.value + 1)],
        value=0,
        description='Halvgarderingar:',
        layout=Layout(width='150px')
    )

    generate_button = Button(description="Skapa rullgardiner", button_style="info")
    predict_without_button = Button(description="Tippar alla! utan halvgarder", button_style="warning")
    predict_with_button = Button(description="Tippar alla! med halvgarder", button_style="success")
    output_area = Output()
    matches_box = VBox([])

    # 3) Koppla logik: uppdatera halvgarde_dropdown när match_dropdown ändras
    def update_halvgarde_options(change):
        max_val = match_dropdown.value
        halvgarde_dropdown.options = [(str(i), i) for i in range(0, max_val + 1)]
        if halvgarde_dropdown.value > max_val:
            halvgarde_dropdown.value = max_val

    match_dropdown.observe(update_halvgarde_options, names='value')

    # 4) Funktion som skapar N par rullgardiner (hemma/borta)
    def build_match_rows(n):
        rows = []
        dd_layout_home = Layout(width='45%', margin='0px 2px 0px 0px')
        dd_layout_away = Layout(width='45%', margin='0px')
        row_layout = Layout(display='flex', flex_flow='row',
                            justify_content='flex-start', align_items='center',
                            width='100%')
        for i in range(n):
            home_dd = Dropdown(options=teams_list,
                               description=f"{i+1}",
                               layout=dd_layout_home,
                               style={'description_width': '20px'})
            away_dd = Dropdown(options=teams_list,
                               description="-",
                               layout=dd_layout_away,
                               style={'description_width': '20px'})
            row_box = HBox([home_dd, away_dd], layout=row_layout)
            rows.append(row_box)
        return rows

    # 5) Koppla knapp "Skapa rullgardiner"
    def generate_match_dropdowns(_):
        with output_area:
            output_area.clear_output()
            n = match_dropdown.value
            new_rows = build_match_rows(n)
            matches_box.children = new_rows
            print(f"Skapat {n} rullgardins-par för matcher!\n")

    generate_button.on_click(generate_match_dropdowns)

    # 6) Prediktering utan halvgardering
    def predict_without_halvgarder(_):
        with output_area:
            output_area.clear_output()
            print("=== Tipsrad utan halvgarder ===")
            for idx, row_box in enumerate(matches_box.children, start=1):
                home_dd, away_dd = row_box.children
                home_disp_val = home_dd.label  # "Arsenal (E0)" - men vi vill visa i output
                away_disp_val = away_dd.label
                home_val = home_dd.value       # "Arsenal" - intern
                away_val = away_dd.value

                # Check om det är samma lag eller data saknas:
                if home_val == away_val:
                    print(f"{idx}) {home_disp_val} - {away_disp_val} [Fel: Samma lag]")
                    print("————————————————")
                    continue
                if (home_val not in final_home_points_5) or (away_val not in final_home_points_5):
                    print(f"{idx}) {home_disp_val} - {away_disp_val} [Fel: Saknas data]")
                    print("————————————————")
                    continue

                league_code = team_league_map.get(home_val, 0)
                row_data = [[
                    final_home_points_5[home_val],
                    final_home_gd_5[home_val],
                    final_home_gf_5[home_val],
                    final_home_ga_5[home_val],
                    final_away_points_5[away_val],
                    final_away_gd_5[away_val],
                    final_away_gf_5[away_val],
                    final_away_ga_5[away_val],
                    league_code
                ]]
                row_df = pd.DataFrame(row_data, columns=FEATURES)
                proba = model_final.predict_proba(row_df)[0]
                max_idx = np.argmax(proba)
                predicted = inv_map[max_idx]

                pH, pD, pA = proba
                print(f"{idx}) {home_disp_val} - {away_disp_val} ⇒ [{predicted}]")
                print(f"[1]{pH*100:.2f}%, [X]{pD*100:.2f}%, [2]{pA*100:.2f}%")
                print("————————————————")
            print("===============")

    # 7) Prediktering med halvgardering (förbättrad osäkerhet)
    def measure_uncertainty(prob):
        """
        Mäter "osäkerhet" som skillnaden mellan högsta och näst högsta sannolikheten.
        Ju mindre skillnad, desto mer osäker match.
        """
        sorted_probs = np.sort(prob)  # min->max
        top_prob = sorted_probs[-1]   # störst
        second_prob = sorted_probs[-2]
        return top_prob - second_prob

    def predict_with_halvgarder(_):
        with output_area:
            output_area.clear_output()
            # Antal halvgarderingar
            num_half = halvgarde_dropdown.value

            print("=== Tipsrad med halvgarder ===")
            matches_info = []
            for idx, row_box in enumerate(matches_box.children, start=1):
                home_dd, away_dd = row_box.children
                home_disp_val = home_dd.label
                away_disp_val = away_dd.label
                home_val = home_dd.value
                away_val = away_dd.value

                # Kontrollera fel
                if home_val == away_val or (home_val not in final_home_points_5) or (away_val not in final_home_points_5):
                    proba = [0, 0, 0]
                    predicted = "Fel"
                else:
                    league_code = team_league_map.get(home_val, 0)
                    row_data = [[
                        final_home_points_5[home_val],
                        final_home_gd_5[home_val],
                        final_home_gf_5[home_val],
                        final_home_ga_5[home_val],
                        final_away_points_5[away_val],
                        final_away_gd_5[away_val],
                        final_away_gf_5[away_val],
                        final_away_ga_5[away_val],
                        league_code
                    ]]
                    row_df = pd.DataFrame(row_data, columns=FEATURES)
                    proba = model_final.predict_proba(row_df)[0]
                    max_idx = np.argmax(proba)
                    predicted = inv_map[max_idx]

                matches_info.append({
                    'idx': idx,
                    'home_display': home_disp_val,
                    'away_display': away_disp_val,
                    'home_val': home_val,
                    'away_val': away_val,
                    'proba': proba,
                    'predicted': predicted
                })

            # Sortera de matcher som inte är "Fel" på osäkerhet
            valid_matches = [m for m in matches_info if m['predicted'] != "Fel"]
            # Sortera stigande (minst differens -> mest osäker)
            sorted_by_uncertainty = sorted(valid_matches, key=lambda x: measure_uncertainty(x['proba']))

            if num_half > 0:
                # Välj ut x st matcher med lägst (top_prob - second_prob)
                half_hedge_set = set(item['idx'] for item in sorted_by_uncertainty[:num_half])
            else:
                half_hedge_set = set()

            # Skriv ut rad för rad
            for info in matches_info:
                idx = info['idx']
                home_disp = info['home_display']
                away_disp = info['away_display']
                proba = info['proba']
                predicted = info['predicted']

                if predicted == "Fel":
                    print(f"{idx}) {home_disp} - {away_disp} [Fel: Samma lag/saknas data?]")
                    print("————————————————")
                else:
                    pH, pD, pA = proba
                    if idx in half_hedge_set:
                        # Ta de två sannolikaste utfallen
                        # Sortera (klass_idx, proba) på proba, desc
                        top_two = sorted(enumerate(proba), key=lambda x: x[1], reverse=True)[:2]
                        # Gör en text "1X" eller "12" etc
                        # Mappa klasser 0->"1",1->"X",2->"2"
                        inv_map_local = {0:"1",1:"X",2:"2"}
                        outcome_syms = [inv_map_local[t[0]] for t in top_two]
                        # Sortera utfalls-etiketterna i ordning 1 < X < 2
                        symbol_order = {"1":0, "X":1, "2":2}
                        combined_outcome = "".join(sorted(outcome_syms, key=lambda x: symbol_order[x]))

                        print(f"{idx}) {home_disp} - {away_disp} ⇒ [{combined_outcome}]")
                        print(f"[1]{pH*100:.2f}%, [X]{pD*100:.2f}%, [2]{pA*100:.2f}%")
                    else:
                        # Använd singelutfall
                        inv_map_local = {0:"1",1:"X",2:"2"}
                        max_idx = np.argmax(proba)
                        single_pred = inv_map_local[max_idx]
                        print(f"{idx}) {home_disp} - {away_disp} ⇒ [{single_pred}]")
                        print(f"[1]{pH*100:.2f}%, [X]{pD*100:.2f}%, [2]{pA*100:.2f}%")
                    print("————————————————")
            print("===============")

    predict_without_button.on_click(predict_without_halvgarder)
    predict_with_button.on_click(predict_with_halvgarder)

    # 8) Lägg allt i en VBox och returnera
    ui_box = VBox([
        match_dropdown,
        halvgarde_dropdown,
        generate_button,
        HBox([predict_without_button, predict_with_button]),
        matches_box,
        output_area
    ])
    return ui_box


# --------------------------
# Huvuddel: Kör flödet
# --------------------------

# A) Ladda data och städa
df_all_raw = download_and_read_data()
df_all_clean = clean_and_prepare_data(df_all_raw)

print("Data laddad och städad!")
print(f"Antal matcher: {len(df_all_clean)}")
print(f"Datumintervall: {df_all_clean['Date'].min().date()} -- {df_all_clean['Date'].max().date()}")

# B) Tidsbaserad train/test-split (sista 3 månader = test)
max_date = df_all_clean["Date"].max()
cutoff_date = max_date - pd.DateOffset(months=3)
df_train_raw = df_all_clean[df_all_clean["Date"] < cutoff_date].copy()
df_test_raw  = df_all_clean[df_all_clean["Date"] >= cutoff_date].copy()

# Räkna form på train/test
df_train_form = calculate_5match_form(df_train_raw)
df_test_form  = calculate_5match_form(df_test_raw)

FEATURES = [
    "HomePoints5", "HomeGD5", "HomeGoalsFor5", "HomeGoalsAgainst5",
    "AwayPoints5", "AwayGD5", "AwayGoalsFor5", "AwayGoalsAgainst5",
    "LeagueCode"
]

X_train = df_train_form[FEATURES]
y_train = df_train_form["ResultLabel"]
X_test  = df_test_form[FEATURES]
y_test  = df_test_form["ResultLabel"]

print(f"\nTrain-set: {len(X_train)} matcher, Test-set (senaste 3 mån): {len(X_test)} matcher.")

# C) Träna (med hyperparametertuning) + skapa slutmodell på ALL data
if len(X_train)>0 and len(X_test)>0:
    best_rf_model = train_model_with_tuning(X_train, y_train, X_test, y_test)
else:
    best_rf_model = None  # Ingen tuning om data saknas

model_final, df_all_form = compute_final_form_and_model(df_all_clean, best_rf_model, FEATURES)

# D) Bygg ordböcker för "just nu"-5-matchers form + UI
(fhp5, fhd5, fhgf5, fhga5,
 fap5, fad5, fagf5, faga5,
 team_league_map) = build_current_form_dicts(df_all_clean)

ui = build_ui(model_final,
              FEATURES,
              fhp5, fhd5, fhgf5, fhga5,
              fap5, fad5, fagf5, faga5,
              team_league_map)

display(ui)

print("\n\n=== Instruktioner ===")
print("1) Välj antal matcher.")
print("2) Välj antal halvgarderingar.")
print("3) Klicka på 'Skapa rullgardiner' => Dropdowns för varje match dyker upp.")
print("4) Välj hemmalag respektive bortalag (du ser nu även ligan i parantes).")
print("5) Klicka på 'Tippar alla! utan halvgarder' eller 'Tippar alla! med halvgarder'.")
print("   - Utdata med sannolikheter och förslag på tips (1, X, 2).")
print("\nAll kod finns i denna cell, men uppdelad i funktioner för överskådlighet.")

=== Tipsrad med halvgarder ===
1) Man City (E0) - Chelsea (E0) ⇒ [12]
[1]43.64%, [X]23.83%, [2]32.53%
————————————————
2) Liverpool (E0) - Ipswich (E0) ⇒ [12]
[1]50.90%, [X]24.44%, [2]24.66%
————————————————
3) Wolves (E0) - Arsenal (E0) ⇒ [X2]
[1]24.47%, [X]27.83%, [2]47.70%
————————————————
4) Brighton (E0) - Everton (E0) ⇒ [12]
[1]43.22%, [X]25.62%, [2]31.15%
————————————————
5) Southampton (E0) - Newcastle (E0) ⇒ [2]
[1]25.23%, [X]21.08%, [2]53.69%
————————————————
6) Bournemouth (E0) - Nott'm Forest (E0) ⇒ [12]
[1]38.06%, [X]23.70%, [2]38.24%
————————————————
7) Bristol City (E1) - Blackburn (E1) ⇒ [1]
[1]61.36%, [X]18.94%, [2]19.70%
————————————————
8) Cardiff (E1) - Derby (E1) ⇒ [12]
[1]42.37%, [X]26.34%, [2]31.29%
————————————————
9) Coventry (E1) - Watford (E1) ⇒ [1X]
[1]51.42%, [X]24.31%, [2]24.27%
————————————————
10) Preston (E1) - Middlesbrough (E1) ⇒ [12]
[1]42.64%, [X]26.08%, [2]31.28%
————————————————
11) QPR (E1) - Sheffield Weds (E1) ⇒ [1]
[1]58.50%, [X]21.03%, [2]20.