<a href="https://colab.research.google.com/github/Emtatos/Football/blob/main/Klar_Skrell_Med_halvgardering_av_13UK_Football.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
# ============================================
# CELL 1: DATAHÄMTNING, STÄDNING, MODELLTRÄNING
# ============================================

# 1A. Aktivera ipywidgets och installera scikit-learn
!pip install ipywidgets scikit-learn --quiet
from google.colab import output
output.enable_custom_widget_manager()  # Aktiverar widget-stöd i Colab

# 1B. Hämta data för E0, E1, E2 (2023–24 och 2024–25)
!wget -nc https://www.football-data.co.uk/mmz4281/2324/E0.csv -O E0_2324.csv
!wget -nc https://www.football-data.co.uk/mmz4281/2425/E0.csv -O E0_2425.csv
!wget -nc https://www.football-data.co.uk/mmz4281/2324/E1.csv -O E1_2324.csv
!wget -nc https://www.football-data.co.uk/mmz4281/2425/E1.csv -O E1_2425.csv
!wget -nc https://www.football-data.co.uk/mmz4281/2324/E2.csv -O E2_2324.csv
!wget -nc https://www.football-data.co.uk/mmz4281/2425/E2.csv -O E2_2425.csv

# 1C. Importera nödvändiga bibliotek
import pandas as pd
import numpy as np
from collections import defaultdict, deque
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# 1D. Hjälpfunktion för att läsa in data och sätta ligakod
def read_and_label(file, league_label):
    df = pd.read_csv(file)
    df["League"] = league_label
    return df

# 1E. Läs in och slå ihop data från E0, E1, E2
df_e0_2324 = read_and_label("E0_2324.csv", "E0")
df_e0_2425 = read_and_label("E0_2425.csv", "E0")
df_e1_2324 = read_and_label("E1_2324.csv", "E1")
df_e1_2425 = read_and_label("E1_2425.csv", "E1")
df_e2_2324 = read_and_label("E2_2324.csv", "E2")
df_e2_2425 = read_and_label("E2_2425.csv", "E2")

df_all = pd.concat([
    df_e0_2324, df_e0_2425,
    df_e1_2324, df_e1_2425,
    df_e2_2324, df_e2_2425
], ignore_index=True)

# 1F. Datumhantering och städning
df_all["Date"] = pd.to_datetime(df_all["Date"], dayfirst=True, errors="coerce")
df_all = df_all.dropna(subset=["Date", "FTHG", "FTAG", "FTR", "League"])
df_all = df_all.sort_values("Date").reset_index(drop=True)

print("Efter städning:")
print(f"Antal matcher totalt: {len(df_all)}")

# Tydlig utskrift av datumintervall
start_date = df_all["Date"].min()
end_date   = df_all["Date"].max()
print(f"Tidigaste match: {start_date.date()} | Senaste match: {end_date.date()}")

mapping = {"H": 0, "D": 1, "A": 2}
df_all["ResultLabel"] = df_all["FTR"].map(mapping)

df_all["League"] = df_all["League"].astype("category")
df_all["LeagueCode"] = df_all["League"].cat.codes

# 1G. Funktion för att beräkna "5 senaste"-form
def calculate_5match_form(df_input):
    df = df_input.copy()
    df["HomePoints5"] = df["HomeGD5"] = df["HomeGoalsFor5"] = df["HomeGoalsAgainst5"] = np.nan
    df["AwayPoints5"] = df["AwayGD5"] = df["AwayGoalsFor5"] = df["AwayGoalsAgainst5"] = np.nan

    home_pts_dict = defaultdict(lambda: deque([], maxlen=5))
    home_gd_dict = defaultdict(lambda: deque([], maxlen=5))
    home_gf_dict = defaultdict(lambda: deque([], maxlen=5))
    home_ga_dict = defaultdict(lambda: deque([], maxlen=5))

    away_pts_dict = defaultdict(lambda: deque([], maxlen=5))
    away_gd_dict = defaultdict(lambda: deque([], maxlen=5))
    away_gf_dict = defaultdict(lambda: deque([], maxlen=5))
    away_ga_dict = defaultdict(lambda: deque([], maxlen=5))

    for i, row in df.iterrows():
        home_team = row["HomeTeam"]
        away_team = row["AwayTeam"]
        fthg = row["FTHG"]
        ftag = row["FTAG"]
        ftr = row["FTR"]

        if len(home_pts_dict[home_team]) > 0:
            df.at[i, "HomePoints5"]       = np.mean(home_pts_dict[home_team])
            df.at[i, "HomeGD5"]           = np.mean(home_gd_dict[home_team])
            df.at[i, "HomeGoalsFor5"]     = np.mean(home_gf_dict[home_team])
            df.at[i, "HomeGoalsAgainst5"] = np.mean(home_ga_dict[home_team])
        else:
            df.at[i, "HomePoints5"]       = 0
            df.at[i, "HomeGD5"]           = 0
            df.at[i, "HomeGoalsFor5"]     = 0
            df.at[i, "HomeGoalsAgainst5"] = 0

        if len(away_pts_dict[away_team]) > 0:
            df.at[i, "AwayPoints5"]       = np.mean(away_pts_dict[away_team])
            df.at[i, "AwayGD5"]           = np.mean(away_gd_dict[away_team])
            df.at[i, "AwayGoalsFor5"]     = np.mean(away_gf_dict[away_team])
            df.at[i, "AwayGoalsAgainst5"] = np.mean(away_ga_dict[away_team])
        else:
            df.at[i, "AwayPoints5"]       = 0
            df.at[i, "AwayGD5"]           = 0
            df.at[i, "AwayGoalsFor5"]     = 0
            df.at[i, "AwayGoalsAgainst5"] = 0

        home_pts = 3 if ftr == "H" else 1 if ftr == "D" else 0
        away_pts = 3 if ftr == "A" else 1 if ftr == "D" else 0
        home_gd = fthg - ftag
        away_gd = ftag - fthg

        home_pts_dict[home_team].append(home_pts)
        home_gd_dict[home_team].append(home_gd)
        home_gf_dict[home_team].append(fthg)
        home_ga_dict[home_team].append(ftag)

        away_pts_dict[away_team].append(away_pts)
        away_gd_dict[away_team].append(away_gd)
        away_gf_dict[away_team].append(ftag)
        away_ga_dict[away_team].append(fthg)

    return df

# 1H. Tidsbaserad train/test-split (3 månader)
max_date = df_all["Date"].max()
cutoff_date = max_date - pd.DateOffset(months=3)
print(f"\nCutoff_date: {cutoff_date.date()}")

df_train_raw = df_all[df_all["Date"] < cutoff_date].copy()
df_test_raw = df_all[df_all["Date"] >= cutoff_date].copy()

df_train_form = calculate_5match_form(df_train_raw)
df_test_form  = calculate_5match_form(df_test_raw)

FEATURES = [
    "HomePoints5", "HomeGD5", "HomeGoalsFor5", "HomeGoalsAgainst5",
    "AwayPoints5", "AwayGD5", "AwayGoalsFor5", "AwayGoalsAgainst5",
    "LeagueCode"
]

X_train = df_train_form[FEATURES]
y_train = df_train_form["ResultLabel"]
X_test  = df_test_form[FEATURES]
y_test  = df_test_form["ResultLabel"]

if len(X_train) > 0 and len(X_test) > 0:
    model_eval = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
    model_eval.fit(X_train, y_train)
    y_pred_test = model_eval.predict(X_test)
    from sklearn.metrics import accuracy_score
    acc_test = accuracy_score(y_test, y_pred_test)
    print(f"Test-accuracy (senaste 3 månaderna): {acc_test:.2%}")
else:
    print("Otillräcklig data för utvärdering av senaste 3 månaderna.")

# 1I. Träna slutgiltig modell på ALL data
df_all_form = calculate_5match_form(df_all)
X_final = df_all_form[FEATURES]
y_final = df_all_form["ResultLabel"]

model_final = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
model_final.fit(X_final, y_final)

# 1J. Förbered "just nu"-form och andra nödvändiga variabler
from collections import defaultdict, deque

home_pts2 = defaultdict(lambda: deque([], maxlen=5))
home_gd2  = defaultdict(lambda: deque([], maxlen=5))
home_gf2  = defaultdict(lambda: deque([], maxlen=5))
home_ga2  = defaultdict(lambda: deque([], maxlen=5))

away_pts2 = defaultdict(lambda: deque([], maxlen=5))
away_gd2  = defaultdict(lambda: deque([], maxlen=5))
away_gf2  = defaultdict(lambda: deque([], maxlen=5))
away_ga2  = defaultdict(lambda: deque([], maxlen=5))

team_league_map = {}
df_all_sorted = df_all.sort_values("Date").reset_index(drop=True)

for i, row in df_all_sorted[::-1].iterrows():
    if row["HomeTeam"] not in team_league_map:
        team_league_map[row["HomeTeam"]] = row["LeagueCode"]
    if row["AwayTeam"] not in team_league_map:
        team_league_map[row["AwayTeam"]] = row["LeagueCode"]

for i, row in df_all_sorted.iterrows():
    home_team = row["HomeTeam"]
    away_team = row["AwayTeam"]
    fthg = row["FTHG"]
    ftag = row["FTAG"]
    ftr = row["FTR"]

    hp = 3 if ftr == "H" else 1 if ftr == "D" else 0
    ap = 3 if ftr == "A" else 1 if ftr == "D" else 0

    hg = fthg - ftag
    ag = ftag - fthg

    home_pts2[home_team].append(hp)
    home_gd2[home_team].append(hg)
    home_gf2[home_team].append(fthg)
    home_ga2[home_team].append(ftag)

    away_pts2[away_team].append(ap)
    away_gd2[away_team].append(ag)
    away_gf2[away_team].append(ftag)
    away_ga2[away_team].append(fthg)

all_teams = set(list(home_pts2.keys()) + list(away_pts2.keys()))

final_home_points_5 = {team: np.mean(home_pts2[team]) if len(home_pts2[team])>0 else 0 for team in all_teams}
final_home_gd_5     = {team: np.mean(home_gd2[team]) if len(home_gd2[team])>0 else 0 for team in all_teams}
final_home_gf_5     = {team: np.mean(home_gf2[team]) if len(home_gf2[team])>0 else 0 for team in all_teams}
final_home_ga_5     = {team: np.mean(home_ga2[team]) if len(home_ga2[team])>0 else 0 for team in all_teams}

final_away_points_5 = {team: np.mean(away_pts2[team]) if len(away_pts2[team])>0 else 0 for team in all_teams}
final_away_gd_5     = {team: np.mean(away_gd2[team])  if len(away_gd2[team])>0  else 0 for team in all_teams}
final_away_gf_5     = {team: np.mean(away_gf2[team])  if len(away_gf2[team])>0  else 0 for team in all_teams}
final_away_ga_5     = {team: np.mean(away_ga2[team])  if len(away_ga2[team])>0  else 0 for team in all_teams}

inv_map = {0: "1", 1: "X", 2: "2"}

print("\nCELL 1 klart! Modell och data är redo för vidare användning.")

File ‘E0_2324.csv’ already there; not retrieving.
File ‘E0_2425.csv’ already there; not retrieving.
File ‘E1_2324.csv’ already there; not retrieving.
File ‘E1_2425.csv’ already there; not retrieving.
File ‘E2_2324.csv’ already there; not retrieving.
File ‘E2_2425.csv’ already there; not retrieving.
Efter städning:
Antal matcher totalt: 2293
Tidigaste match: 2023-08-04 | Senaste match: 2025-01-16

Cutoff_date: 2024-10-16
Test-accuracy (senaste 3 månaderna): 41.36%

CELL 1 klart! Modell och data är redo för vidare användning.


In [28]:
import ipywidgets as widgets
from ipywidgets import Dropdown, Button, VBox, HBox, Output, Layout
from IPython.display import display
import pandas as pd

teams_list = sorted(all_teams)

# Dropdown för antal matcher (1–13)
match_dropdown = Dropdown(
    options=[(str(i), i) for i in range(1, 14)],
    value=3,
    description='Antal matcher:',
    layout=Layout(width='150px')
)

# Dropdown för antal halvgarder med ändrad etikett
halvgarde_dropdown = Dropdown(
    options=[(str(i), i) for i in range(0, match_dropdown.value + 1)],
    value=0,
    description='Halvgarderingar:',
    layout=Layout(width='150px')
)

generate_button = Button(description="Skapa rullgardiner", button_style="info")
predict_without_button = Button(description="Tippar alla! utan halvgarder", button_style="warning")
predict_with_button = Button(description="Tippar alla! med halvgarder", button_style="success")
output_area = Output()
matches_box = VBox([])

def update_halvgarde_options(change):
    max_val = match_dropdown.value
    halvgarde_dropdown.options = [(str(i), i) for i in range(0, max_val + 1)]
    if halvgarde_dropdown.value > max_val:
        halvgarde_dropdown.value = max_val

match_dropdown.observe(update_halvgarde_options, names='value')

def build_match_rows(n):
    rows = []
    dd_layout_home = Layout(width='45%', margin='0px 2px 0px 0px')
    dd_layout_away = Layout(width='45%', margin='0px')
    row_layout = Layout(display='flex', flex_flow='row', justify_content='flex-start', align_items='center', width='100%')
    for i in range(n):
        home_dd = Dropdown(options=teams_list, description=f"{i+1}", layout=dd_layout_home, style={'description_width': '20px'})
        away_dd = Dropdown(options=teams_list, description="-", layout=dd_layout_away, style={'description_width': '20px'})
        row_box = HBox([home_dd, away_dd], layout=row_layout)
        rows.append(row_box)
    return rows

def generate_match_dropdowns(_):
    with output_area:
        output_area.clear_output()
        n = match_dropdown.value
        new_rows = build_match_rows(n)
        matches_box.children = new_rows
        print(f"Skapat {n} rullgardins-par för matcher!\n")

generate_button.on_click(generate_match_dropdowns)

def predict_without_halvgarder(_):
    with output_area:
        output_area.clear_output()
        print("=== Tipsrad utan halvgarder ===")
        for idx, row_box in enumerate(matches_box.children, start=1):
            home_dd, away_dd = row_box.children
            home_val = home_dd.value
            away_val = away_dd.value

            if home_val == away_val:
                print(f"{idx}) {home_val} - {away_val} [Fel: Samma lag]")
                print("————————————————")
                continue
            if (home_val not in final_home_points_5) or (away_val not in final_away_points_5):
                print(f"{idx}) {home_val} - {away_val} [Fel: Saknas data]")
                print("————————————————")
                continue

            league_code = team_league_map.get(home_val, 0)
            row_data = [[
                final_home_points_5[home_val],
                final_home_gd_5[home_val],
                final_home_gf_5[home_val],
                final_home_ga_5[home_val],
                final_away_points_5[away_val],
                final_away_gd_5[away_val],
                final_away_gf_5[away_val],
                final_away_ga_5[away_val],
                league_code
            ]]
            row_df = pd.DataFrame(row_data, columns=FEATURES)
            proba = model_final.predict_proba(row_df)[0]
            max_idx = np.argmax(proba)
            predicted = inv_map[max_idx]

            print(f"{idx}) {home_val} - {away_val} [{predicted}]")
            print(f"[1]{proba[0]*100:.2f}%, [X]{proba[1]*100:.2f}%, [2]{proba[2]*100:.2f}%")
            print("————————————————")
        print("===============")

def predict_with_halvgarder(_):
    with output_area:
        output_area.clear_output()
        # Hämta antalet halvgarder från dropdown
        num_half = halvgarde_dropdown.value

        print("=== Tipsrad med halvgarder ===")
        matches_info = []
        for idx, row_box in enumerate(matches_box.children, start=1):
            home_dd, away_dd = row_box.children
            home_val = home_dd.value
            away_val = away_dd.value

            if home_val == away_val or (home_val not in final_home_points_5) or (away_val not in final_away_points_5):
                proba = [0, 0, 0]
                predicted = "Fel"
            else:
                league_code = team_league_map.get(home_val, 0)
                row_data = [[
                    final_home_points_5[home_val],
                    final_home_gd_5[home_val],
                    final_home_gf_5[home_val],
                    final_home_ga_5[home_val],
                    final_away_points_5[away_val],
                    final_away_gd_5[away_val],
                    final_away_gf_5[away_val],
                    final_away_ga_5[away_val],
                    league_code
                ]]
                row_df = pd.DataFrame(row_data, columns=FEATURES)
                proba = model_final.predict_proba(row_df)[0]
                max_idx = np.argmax(proba)
                predicted = inv_map[max_idx]

            matches_info.append({
                'idx': idx,
                'home': home_val,
                'away': away_val,
                'proba': proba,
                'predicted': predicted
            })

        if num_half > 0:
            sorted_by_uncertainty = sorted(
                [m for m in matches_info if m['predicted'] != "Fel"],
                key=lambda x: abs(x['proba'][0]-x['proba'][1])
            )
            half_hedge_set = set(item['idx'] for item in sorted_by_uncertainty[:num_half])
        else:
            half_hedge_set = set()

        for info in matches_info:
            idx = info['idx']
            home_val = info['home']
            away_val = info['away']
            proba = info['proba']
            predicted = info['predicted']

            if predicted == "Fel":
                print(f"{idx}) {home_val} - {away_val} [Fel]")
                print("————————————————")
            else:
                if idx in half_hedge_set:
                    outcomes = sorted(enumerate(proba), key=lambda x: x[1], reverse=True)[:2]
                    outcome_symbols = [inv_map[o[0]] for o in outcomes]
                    desired_order = {"1": 0, "X": 1, "2": 2}
                    combined_outcome = "".join(sorted(outcome_symbols, key=lambda x: desired_order[x]))
                    print(f"{idx}) {home_val} - {away_val} [{combined_outcome}]")
                    print(f"[1]{proba[0]*100:.2f}%, [X]{proba[1]*100:.2f}%, [2]{proba[2]*100:.2f}%")
                else:
                    print(f"{idx}) {home_val} - {away_val} [{predicted}]")
                    print(f"[1]{proba[0]*100:.2f}%, [X]{proba[1]*100:.2f}%, [2]{proba[2]*100:.2f}%")
                print("————————————————")
        print("===============")

predict_without_button.on_click(predict_without_halvgarder)
predict_with_button.on_click(predict_with_halvgarder)

ui_box = VBox([
    match_dropdown,
    halvgarde_dropdown,
    generate_button,
    HBox([predict_without_button, predict_with_button]),
    matches_box,
    output_area
])
display(ui_box)

print("1) Välj antal matcher med dropdownen.")
print("2) Välj antal halvgarderingar med den andra dropdownen.")
print("3) Klicka 'Skapa rullgardiner'.")
print("4) Justera val av lag för varje match.")
print("5) Klicka 'Tippar alla! utan halvgarder' för att se resultat utan halvgarderingar.")
print("6) Klicka 'Tippar alla! med halvgarder' för att se resultat med halvgarderingar.")

VBox(children=(Dropdown(description='Antal matcher:', index=2, layout=Layout(width='150px'), options=(('1', 1)…

1) Välj antal matcher med dropdownen.
2) Välj antal halvgarderingar med den andra dropdownen.
3) Klicka 'Skapa rullgardiner'.
4) Justera val av lag för varje match.
5) Klicka 'Tippar alla! utan halvgarder' för att se resultat utan halvgarderingar.
6) Klicka 'Tippar alla! med halvgarder' för att se resultat med halvgarderingar.


In [29]:
# ============================================
# CELL 3: uppdatera cell 2 knappar innan cell 4 Hyperparameter-tuning med RandomizedSearchCV för RandomForest
# ============================================

from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Definiera ett parameterutrymme för RandomizedSearch
param_distributions = {
    'n_estimators': [100, 200, 300, 400],
    'max_depth': [5, 10, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Skapa en grundläggande RandomForest-modell
rf = RandomForestClassifier(random_state=42)

# Skapa RandomizedSearchCV-objektet
random_search = RandomizedSearchCV(estimator=rf,
                                   param_distributions=param_distributions,
                                   n_iter=20,        # Testa 20 slumpmässiga kombinationer
                                   cv=3,             # 3-faldig korsvalidering
                                   n_jobs=-1,        # Använd alla processorkärnor
                                   verbose=1,
                                   scoring='accuracy',
                                   random_state=42)

# Utför hyperparameter-tuning
print("Startar RandomizedSearchCV...")
random_search.fit(X_train, y_train)

# Skriv ut de bästa hittade parametrarna
print("Bästa parametrar:", random_search.best_params_)

# Hämta den bästa modellen och träna den
best_rf = random_search.best_estimator_
best_rf.fit(X_train, y_train)

# Utvärdera den bästa modellen på testdatan
y_pred = best_rf.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
print(f"Uppdaterad testnoggrannhet: {test_accuracy:.2%}")

# Uppdatera den globala modellen automatiskt
model_final = best_rf
print("Model_final har uppdaterats automatiskt med de bästa parametrarna.")

Startar RandomizedSearchCV...
Fitting 3 folds for each of 20 candidates, totalling 60 fits
Bästa parametrar: {'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_depth': 5}
Uppdaterad testnoggrannhet: 41.94%
Model_final har uppdaterats automatiskt med de bästa parametrarna.


In [30]:
# ============================================
# CELL 4: Exempel på (1) tredje kategori, (2) bredare trösklar, (3) "slight vs clear" favorit/underdog
# ============================================

import pandas as pd
from collections import defaultdict

# A. Statsdict för historiska roller
#    Varje lag och roll:
#      - home_clear_fav / home_slight_fav / home_no_fav / home_slight_ud / home_clear_ud
#        + vinst/förlust
#      - away_clear_fav / away_slight_fav / away_no_fav / away_slight_ud / away_clear_ud
#        + vinst/förlust
stats_dict = defaultdict(lambda: {
    "home_clear_fav_count": 0, "home_clear_fav_wins": 0,
    "home_slight_fav_count": 0, "home_slight_fav_wins": 0,
    "home_no_fav_count": 0,    "home_no_fav_wins": 0,
    "home_slight_ud_count": 0, "home_slight_ud_wins": 0,
    "home_clear_ud_count": 0,  "home_clear_ud_wins": 0,

    "away_clear_fav_count": 0, "away_clear_fav_wins": 0,
    "away_slight_fav_count": 0, "away_slight_fav_wins": 0,
    "away_no_fav_count": 0,    "away_no_fav_wins": 0,
    "away_slight_ud_count": 0, "away_slight_ud_wins": 0,
    "away_clear_ud_count": 0,  "away_clear_ud_wins": 0
})

# B. Funktion för att bestämma "clear/slight/no" favorit eller underdog baserat på prob
def classify_probability(prob):
    # 3. definiera thresholds:
    # >0.60 => clear favorite
    # >0.50 => slight favorite
    # >0.45 => no favorite (ingen tydlig favorit)
    # >0.40 => slight underdog
    # annars => clear underdog
    if prob > 0.60:
        return "clear_fav"
    elif prob > 0.50:
        return "slight_fav"
    elif prob > 0.45:
        return "no_fav"
    elif prob > 0.40:
        return "slight_ud"
    else:
        return "clear_ud"

# Säkerställ att predicted_prob finns i df_all_form
if "predicted_prob" not in df_all_form.columns:
    print("Beräknar sannolikheter [p(H), p(D), p(A)] för alla matcher i df_all_form ...")
    df_features = df_all_form[FEATURES]
    all_probs = model_final.predict_proba(df_features)
    df_all_form["predicted_prob"] = list(all_probs)

# Skapa HomeTeamWinProb, AwayTeamWinProb om de saknas
if "HomeTeamWinProb" not in df_all_form.columns or "AwayTeamWinProb" not in df_all_form.columns:
    print("Skapar kolumner HomeTeamWinProb och AwayTeamWinProb...")
    def extract_probs(row):
        # row["predicted_prob"] = [p(H), p(D), p(A)]
        probs = row["predicted_prob"]
        return pd.Series({
            "HomeTeamWinProb": probs[0],
            "AwayTeamWinProb": probs[2]
        })
    df_tmp = df_all_form.apply(extract_probs, axis=1)
    df_all_form["HomeTeamWinProb"] = df_tmp["HomeTeamWinProb"]
    df_all_form["AwayTeamWinProb"] = df_tmp["AwayTeamWinProb"]

# C. Loopar över historiken i df_all_form => definiera lagets roll (clear_fav / slight_fav / no_fav / slight_ud / clear_ud)
for i, row in df_all_form.iterrows():
    home_team = row["HomeTeam"]
    away_team = row["AwayTeam"]
    ftr = row["FTR"]  # "H", "D", "A"
    home_prob = row["HomeTeamWinProb"]
    away_prob = row["AwayTeamWinProb"]

    # Klassificera hemmalagets roll
    home_role = classify_probability(home_prob)
    # Öka count
    stats_dict[home_team][f"home_{home_role}_count"] += 1

    # Klassificera bortalagets roll
    away_role = classify_probability(away_prob)
    stats_dict[away_team][f"away_{away_role}_count"] += 1

    # Räkna vinster om match ej oavgjord
    if ftr == "H":
        # Hemmalaget vann
        stats_dict[home_team][f"home_{home_role}_wins"] += 1
    elif ftr == "A":
        # Bortalaget vann
        stats_dict[away_team][f"away_{away_role}_wins"] += 1
    else:
        # "D": Oavgjort => ingen vinst
        pass

print("\n=== Resultat för var och en av de valda matcherna ===\n")

# D. För varje vald match i matches_box => "just nu"-klassning + historik
for row_box in matches_box.children:
    home_dd, away_dd = row_box.children
    home_team = home_dd.value
    away_team = away_dd.value

    # Bygg 1-rads DataFrame med "just nu"-form => p(H), p(D), p(A)
    league_code_home = team_league_map.get(home_team, 0)
    row_df = pd.DataFrame([[
        final_home_points_5[home_team],
        final_home_gd_5[home_team],
        final_home_gf_5[home_team],
        final_home_ga_5[home_team],

        final_away_points_5[away_team],
        final_away_gd_5[away_team],
        final_away_gf_5[away_team],
        final_away_ga_5[away_team],

        league_code_home
    ]], columns=FEATURES)

    proba = model_final.predict_proba(row_df)[0]  # [p(H), p(D), p(A)]
    pH, pD, pA = proba
    max_idx = proba.argmax()
    predicted_result = ["1","X","2"][max_idx]

    print(f"{home_team} - {away_team} [{predicted_result}]")
    print(f"[1]{pH*100:.2f}%, [X]{pD*100:.2f}%, [2]{pA*100:.2f}%\n")

    # Hemmalagets roll "just nu"
    home_current_role = classify_probability(pH)
    # Bortalagets roll "just nu"
    away_current_role = classify_probability(pA)

    # Hämta historiska stats
    home_stats = stats_dict[home_team]
    away_stats = stats_dict[away_team]

    # Hämta relevant nyckel => "home_{role}_count", "home_{role}_wins"
    home_count_key = f"home_{home_current_role}_count"
    home_wins_key  = f"home_{home_current_role}_wins"

    if home_current_role.startswith("no_fav"):
        # no favorite => mer neutral text
        print(f"{home_team} (hemma, 'no favorite'-roll):")
    elif home_current_role.startswith("slight_fav"):
        print(f"{home_team} (hemma, 'slight favorite'):")
    elif home_current_role.startswith("clear_fav"):
        print(f"{home_team} (hemma, 'clear favorite'):")
    elif home_current_role.startswith("slight_ud"):
        print(f"{home_team} (hemma, 'slight underdog'):")
    elif home_current_role.startswith("clear_ud"):
        print(f"{home_team} (hemma, 'clear underdog'):")

    h_count = home_stats[home_count_key]
    h_wins  = home_stats[home_wins_key]
    h_win_pct = (h_wins / h_count)*100 if h_count>0 else 0
    # Förluster => h_count - h_wins - (oavgjorda?), men vi struntar i oavgjort i ren vinst/förlust-räkning
    # Kod förenklad => vi definierar "losses" = h_count - h_wins
    h_losses = h_count - h_wins
    h_loss_pct = (h_losses / h_count)*100 if h_count>0 else 0

    print(f"  Totalt scenario: {h_count} matcher")
    print(f"  Vinster:  {h_wins}  ({h_win_pct:.1f}%)")
    print(f"  Förluster:{h_losses}  ({h_loss_pct:.1f}%)\n")

    # Bortalaget
    away_count_key = f"away_{away_current_role}_count"
    away_wins_key  = f"away_{away_current_role}_wins"

    if away_current_role.startswith("no_fav"):
        print(f"{away_team} (borta, 'no favorite'):")
    elif away_current_role.startswith("slight_fav"):
        print(f"{away_team} (borta, 'slight favorite'):")
    elif away_current_role.startswith("clear_fav"):
        print(f"{away_team} (borta, 'clear favorite'):")
    elif away_current_role.startswith("slight_ud"):
        print(f"{away_team} (borta, 'slight underdog'):")
    elif away_current_role.startswith("clear_ud"):
        print(f"{away_team} (borta, 'clear underdog'):")

    a_count = away_stats[away_count_key]
    a_wins  = away_stats[away_wins_key]
    a_win_pct = (a_wins / a_count)*100 if a_count>0 else 0
    a_losses = a_count - a_wins
    a_loss_pct = (a_losses / a_count)*100 if a_count>0 else 0

    print(f"  Totalt scenario: {a_count} matcher")
    print(f"  Vinster:  {a_wins}  ({a_win_pct:.1f}%)")
    print(f"  Förluster:{a_losses}  ({a_loss_pct:.1f}%)\n")

print("=== Klart! ===")

# Skriv ut datumperiod
start_date = df_all_form["Date"].min()
end_date   = df_all_form["Date"].max()
print(f"Period för data: {start_date.date()} -- {end_date.date()}")
print("Cell 4 klar med triple-kategori + threshold-lager + clear/slight roller.")

Beräknar sannolikheter [p(H), p(D), p(A)] för alla matcher i df_all_form ...
Skapar kolumner HomeTeamWinProb och AwayTeamWinProb...

=== Resultat för var och en av de valda matcherna ===

Arsenal - Aston Villa [1]
[1]54.98%, [X]21.28%, [2]23.74%

Arsenal (hemma, 'slight favorite'):
  Totalt scenario: 12 matcher
  Vinster:  9  (75.0%)
  Förluster:3  (25.0%)

Aston Villa (borta, 'clear underdog'):
  Totalt scenario: 25 matcher
  Vinster:  9  (36.0%)
  Förluster:16  (64.0%)

Barnsley - Birmingham [2]
[1]33.97%, [X]25.63%, [2]40.40%

Barnsley (hemma, 'clear underdog'):
  Totalt scenario: 15 matcher
  Vinster:  3  (20.0%)
  Förluster:12  (80.0%)

Birmingham (borta, 'slight underdog'):
  Totalt scenario: 0 matcher
  Vinster:  0  (0.0%)
  Förluster:0  (0.0%)

Blackburn - Blackpool [1]
[1]44.58%, [X]22.17%, [2]33.25%

Blackburn (hemma, 'slight underdog'):
  Totalt scenario: 4 matcher
  Vinster:  1  (25.0%)
  Förluster:3  (75.0%)

Blackpool (borta, 'clear underdog'):
  Totalt scenario: 35 match