In [None]:
# Imports & grundinställningar

import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
)
from sklearn.base import clone
from sklearn.feature_selection import RFE
from xgboost import XGBClassifier
from scipy.stats import ttest_rel
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline

from scipy.optimize import differential_evolution
from IPython.display import display

RANDOM_STATE = 42
DATA_DIR = os.path.join("..", "data")



# Huvudfunktion för att utvärdera en modell

def evaluate_model(model, X_test, y_test):
    """
    Tar en TRÄNAD modell + testdata och räknar nyckeltal.
    Returnerar en dict med metrics + y_pred/y_proba.
    """
    y_pred = model.predict(X_test)

    if hasattr(model, "predict_proba"):
        y_proba = model.predict_proba(X_test)[:, 1]
    else:
        # t.ex. SVC utan predict_proba men med decision_function
        y_proba_raw = model.decision_function(X_test)
        # skala om till [0,1] om det behövs
        y_proba = (y_proba_raw - y_proba_raw.min()) / (y_proba_raw.max() - y_proba_raw.min() + 1e-9)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    auc = roc_auc_score(y_test, y_proba)

    return {
        "accuracy": acc,
        "precision": prec,
        "recall": rec,
        "f1": f1,
        "auc": auc,
        "y_pred": y_pred,
        "y_proba": y_proba,
    }


# === Del 3: Dataset-konfiguration & förberedelse ===

POSSIBLE_TARGETS_GENERAL = [
    "defects", "Defects", "defect", "bug", "bugs", "problems", "problem",
    "class", "target"
]

# dataset-specifika kandidater (vi vet lite extra om vissa)
POSSIBLE_TARGETS_BY_DATASET = {
    "JM1": ["defects", "Defects"],
    "KC1": ["defects", "Defects"],
    "KC2": ["problems", "bug", "bugs", "defects", "Defects"],
    "PC1": ["defects", "Defects"],
    "CM1": ["defects", "Defects"],
}

DATASETS = {
    "JM1": {"filename": "jm1.csv", "target": "defects"},
    "KC1": {"filename": "kc1.csv", "target": "defects"},
    "KC2": {"filename": "kc2.csv", "target": "defects"},
    "PC1": {"filename": "pc1.csv", "target": "defects"},
    "CM1": {"filename": "cm1.csv", "target": "defects"},
}

def load_and_prepare_dataset(dataset_name):
    """
    Läser in valt dataset, försöker hitta rätt target-kolumn,
    delar i train/test och skalar features.
    """
    info = DATASETS[dataset_name]
    path = os.path.join(DATA_DIR, info["filename"])
    df = pd.read_csv(path)

    # 1) välj kandidatlista för target-namn
    candidate_targets = POSSIBLE_TARGETS_BY_DATASET.get(
        dataset_name,
        POSSIBLE_TARGETS_GENERAL
    )

    # 2) hitta första kolumn som matchar en kandidat
    target_col = None
    for cand in candidate_targets:
        if cand in df.columns:
            target_col = cand
            break

    if target_col is None:
        raise ValueError(
            f"Kunde inte hitta target-kolumn i {dataset_name}.\n"
            f"Försökte med: {candidate_targets}\n"
            f"Filen har kolumner: {list(df.columns)}"
        )

    print(f"Använder target-kolumn '{target_col}' för dataset {dataset_name}.")

    X = df.drop(columns=[target_col])
    y = df[target_col]

    # säkerställ 0/1
    if y.dtype == "bool":
        y = y.astype(int)
    elif y.dtype == "object":
        y = y.astype(str).str.lower().map({
            "yes": 1,
            "true": 1,
            "defective": 1,
            "bug": 1,
            "bugs": 1,
            "problem": 1,
            "problems": 1,
            "1": 1,
            "0": 0,
        }).fillna(0).astype(int)

    print(f"{dataset_name}: shape={df.shape}")
    print("Klassfördelning (hela datan):")
    print(y.value_counts(), "\n")

    X_train, X_test, y_train, y_test = train_test_split(
        X, y,
        test_size=0.2,
        stratify=y,
        random_state=RANDOM_STATE
    )

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    print("Train klassfördelning:")
    print(y_train.value_counts())
    print("\nTest klassfördelning:")
    print(y_test.value_counts(), "\n")

    return X_train_scaled, X_test_scaled, y_train, y_test

# === Del 3.5: Feature selection med RFE ===

def apply_rfe(base_model, X_train, y_train, X_test, n_features_to_select=12):
    """
    Kör RFE på X_train/y_train med given basmodell och behåller n_features_to_select features.
    Returnerar:
        - X_train_rfe, X_test_rfe (endast utvalda features)
        - selector (om du vill inspektera vilka features som valts)
    """
    # Använd en klon av modellen så vi inte förstör originalet
    estimator = clone(base_model)

    selector = RFE(
        estimator=estimator,
        n_features_to_select=n_features_to_select,
        step=1
    )

    # Anpassa på TRAIN-datan
    X_train_rfe = selector.fit_transform(X_train, y_train)

    # Applicera samma urval på TEST-datan
    X_test_rfe = selector.transform(X_test)

    print(f"RFE: behåller {n_features_to_select} features av totalt {X_train.shape[1]}")
    print("RFE support (True = behåll feature):")
    print(selector.support_)
    print("RFE ranking (1 = viktigast):")
    print(selector.ranking_, "\n")

    return X_train_rfe, X_test_rfe, selector

# === Del 4: Modeller (alla basmodeller) ===

def get_base_models():
    """
    Skapar alla modeller vi vill testa.
    """
    log_reg = LogisticRegression(max_iter=1000, n_jobs=-1)

    rf = RandomForestClassifier(
        n_estimators=200,
        random_state=RANDOM_STATE,
        n_jobs=-1,
    )

    xgb = XGBClassifier(
        n_estimators=300,
        learning_rate=0.1,
        max_depth=5,
        subsample=0.8,
        colsample_bytree=0.8,
        eval_metric="logloss",
        random_state=RANDOM_STATE,
        n_jobs=-1,
    )

    ann = MLPClassifier(
        hidden_layer_sizes=(64, 32),
        activation="relu",
        max_iter=200,
        random_state=RANDOM_STATE,
    )

    svc = SVC(
        kernel="rbf",
        probability=True,   # behövs för AUC
        random_state=RANDOM_STATE,
    )

    voting = VotingClassifier(
        estimators=[
            ("logreg", log_reg),
            ("rf", rf),
            ("xgb", xgb),
        ],
        voting="soft"  # använder sannolikheter
    )

    models = {
        "LogisticRegression": log_reg,
        "RandomForest": rf,
        "XGBoost": xgb,
        "ANN": ann,
        "SVC": svc,
        "Voting": voting,
    }
    return models


# === Del 5: SMOTE-varianter ===

def apply_basic_smote(X_train, y_train):
    """
    Standard-SMOTE med default-parametrar.
    """
    smote = SMOTE(random_state=RANDOM_STATE)
    X_res, y_res = smote.fit_resample(X_train, y_train)
    print("Efter basic SMOTE:")
    print(pd.Series(y_res).value_counts(), "\n")
    return X_res, y_res


def smote_grid_search(model, X_train, y_train):
    """
    Enkel grid search på SMOTE-parametrar (inspirerad av SMOTUNED-idén).
    """
    pipe = Pipeline([
        ("smote", SMOTE(random_state=RANDOM_STATE)),
        ("clf", model),
    ])

    # OBS: dubbel underscore för pipeline-parametrar!
    param_grid = {
        "smote__k_neighbors": [3, 5, 7],
        "smote__sampling_strategy": [0.5, 0.75, 1.0],
    }

    grid = GridSearchCV(
        pipe,
        param_grid,
        scoring="f1",
        cv=3,
        n_jobs=-1,
        verbose=0,
    )

    grid.fit(X_train, y_train)
    print("GRID-SMOTE – bästa parametrar:", grid.best_params_)
    return grid.best_estimator_


def smotuned_de(model, X_train, y_train):
    """
    Förenklad SMOTUNED-idé:
    differential evolution optimerar SMOTE-parametrar (k_neighbors, sampling_strategy)
    för att maximera F1 med 3-fold CV.
    """

    def objective(params):
        # params = [k_neighbors, sampling_strategy]
        k = int(round(params[0]))
        k = max(2, min(k, 15))   # håll k inom [2, 15]

        sampling = float(params[1])
        sampling = max(0.2, min(sampling, 1.0))  # sampling_strategy inom [0.2, 1.0]

        smote = SMOTE(
            k_neighbors=k,
            sampling_strategy=sampling,
            random_state=RANDOM_STATE,
        )

        X_res, y_res = smote.fit_resample(X_train, y_train)

        cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE)
        scores = []

        for train_idx, val_idx in cv.split(X_res, y_res):
            X_tr, X_val = X_res[train_idx], X_res[val_idx]
            y_tr, y_val = y_res[train_idx], y_res[val_idx]

            m = clone(model)
            m.fit(X_tr, y_tr)
            y_pred = m.predict(X_val)
            scores.append(f1_score(y_val, y_pred, zero_division=0))

        # differential_evolution minimerar, så vi returnerar -F1
        return -np.mean(scores)

    bounds = [
        (2, 15),    # k_neighbors
        (0.2, 1.0), # sampling_strategy
    ]

    result = differential_evolution(
        objective,
        bounds,
        maxiter=15,
        popsize=10,
        tol=0.01,
        polish=True,
        disp=False,
    )

    best_k = int(round(result.x[0]))
    best_sampling = float(result.x[1])
    best_k = max(2, min(best_k, 15))
    best_sampling = max(0.2, min(best_sampling, 1.0))

    print("SMOTUNED-DE – bästa parametrar:")
    print("k_neighbors:", best_k)
    print("sampling_strategy:", best_sampling)

    best_smote = SMOTE(
        k_neighbors=best_k,
        sampling_strategy=best_sampling,
        random_state=RANDOM_STATE,
    )
    X_res_best, y_res_best = best_smote.fit_resample(X_train, y_train)

    final_model = clone(model)
    final_model.fit(X_res_best, y_res_best)

    return final_model

def train_with_smote_mode(base_model, X_train, y_train, smote_mode): # Hjälpfunktion som hanterar smote_mode
    model = clone(base_model)
    
    if smote_mode == "NONE":
        print("Ingen SMOTE används.\n")
        model.fit(X_train, y_train)
        used_model = model
        smote_label = "NONE"
    elif smote_mode == "BASIC":
        X_smote, y_smote = apply_basic_smote(X_train, y_train)
        model.fit(X_smote, y_smote)
        used_model = model
        smote_label = "BASIC"

    elif smote_mode == "GRID":
        # grid-funktionen tränar själv och returnerar bästa estimatorn
        used_model = smote_grid_search(model, X_train, y_train)
        smote_label = "GRID"

    elif smote_mode == "SMOTUNED-DE":
        # smotuned_de tränar också och returnerar en färdig modell
        used_model = smotuned_de(model, X_train, y_train)
        smote_label = "SMOTUNED-DE"

    else:
        print("Ogiltigt SMOTE-läge, använder NONE.")
        model.fit(X_train, y_train)
        used_model = model
        smote_label = "NONE"

    return used_model, smote_label    

def cross_val_f1_scores(dataset_name, model_name, smote_mode="NONE", use_rfe=False, n_features_to_select=12, n_splits=3):
    """
    Kör StratifiedKFold CV och returnerar en lista med F1-scores för vald modell.
    """
    X_train_scaled, X_test_scaled, y_train, y_test = load_and_prepare_dataset(dataset_name)
    # Vi slår ihop train+test här för att göra CV på hela datasetet
    X_all = np.vstack([X_train_scaled, X_test_scaled])
    y_all = np.concatenate([y_train.values, y_test.values])

    cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_STATE)
    base_models = get_base_models()
    base_model = base_models[model_name]

    f1_scores = []

    for fold_idx, (train_idx, val_idx) in enumerate(cv.split(X_all, y_all), start=1):
        X_tr, X_val = X_all[train_idx], X_all[val_idx]
        y_tr, y_val = y_all[train_idx], y_all[val_idx]

        # ev. RFE per fold
        if use_rfe:
            X_tr, X_val, _ = apply_rfe(
                base_model=base_model,
                X_train=X_tr,
                y_train=y_tr,
                X_test=X_val,
                n_features_to_select=n_features_to_select,
            )

        used_model, smote_label = train_with_smote_mode(
            base_model=base_model,
            X_train=X_tr,
            y_train=y_tr,
            smote_mode=smote_mode,
        )

        y_pred = used_model.predict(X_val)
        f1 = f1_score(y_val, y_pred, zero_division=0)
        f1_scores.append(f1)

        print(f"[{dataset_name}] Fold {fold_idx}: F1 = {f1:.4f} (SMOTE={smote_label})")

    return np.array(f1_scores)
   
# === Del 6: jämförelsefunktion för EN modell + EN dataset ===

def compare_smote_variants(dataset_name, model_name, use_rfe=False, n_features_to_select=12):
    """
    Kör SAMMA dataset + SAMMA modell med:
    - ingen SMOTE
    - basic SMOTE
    - GRID-SMOTE
    - SMOTUNED-DE
    och returnerar en tabell med nyckeltal + pivot på F1.
    """

    # 1) Ladda och skala data
    X_train_scaled, X_test_scaled, y_train, y_test = load_and_prepare_dataset(dataset_name)
    # 1b) Valfritt: Feature selection med RFE
    if use_rfe:
        print(f"\n>>> Kör RFE med {n_features_to_select} features för {dataset_name} / {model_name} <<<\n")
        # Använd samma typ av modell som vi ska utvärdera
        temp_models = get_base_models()
        rfe_base_model = temp_models[model_name]

        X_train_used, X_test_used, rfe_selector = apply_rfe(
            base_model=rfe_base_model,
            X_train=X_train_scaled,
            y_train=y_train,
            X_test=X_test_scaled,
            n_features_to_select=n_features_to_select,
        )
    else:
        X_train_used = X_train_scaled
        X_test_used = X_test_scaled

    # 2) Hämta vald basmodell
    base_models = get_base_models()
    if model_name not in base_models:
        raise ValueError(f"Modell '{model_name}' finns inte. Tillgängliga: {list(base_models.keys())}")
    base_model = base_models[model_name]

    results = []

    # Alla SMOTE-lägen vi vill jämföra
    smote_modes = ["NONE", "BASIC", "GRID", "SMOTUNED-DE"]

    for mode in smote_modes:
        print(f"\n--- Tränar {model_name} på {dataset_name} med SMOTE-läge: {mode} ---")

        used_model, smote_label = train_with_smote_mode(
            base_model=base_model,
            X_train=X_train_used,   # <-- RFE-version eller original
            y_train=y_train,
            smote_mode=mode,
        )

        res = evaluate_model(used_model, X_test_used, y_test)  # <-- samma här

        results.append({
            "dataset": dataset_name,
            "model": model_name,
            "smote_mode": smote_label,
            "accuracy": res["accuracy"],
            "precision": res["precision"],
            "recall": res["recall"],
            "f1": res["f1"],
            "auc": res["auc"],
        })

    df = pd.DataFrame(results)
    print(f"\n=== Jämförelse SMOTE-varianter – dataset: {dataset_name}, modell: {model_name} ===")
    display(df)

    pivot_f1 = df.pivot_table(
        index=["dataset", "model"],
        columns="smote_mode",
        values="f1"
    )
    print("\nF1 per SMOTE-läge:")
    display(pivot_f1)

    return df, pivot_f1

def cross_project_experiment(
    train_dataset,
    test_dataset,
    model_name,
    smote_mode="SMOTUNED-DE",
    use_rfe=False,
    n_features_to_select=12,
):
    """
    Train on one dataset (train_dataset) and test on another (test_dataset).
    - Skalar features baserat på TRAIN och använder samma transformation på TEST.
    - Alignar features: behåller bara gemensamma kolumner med samma namn.
    - Kan använda SMOTE-läge + ev. RFE (use_rfe=True).
    """
    # ===== 1) Läs in TRAIN-data =====
    info_train = DATASETS[train_dataset]
    path_train = os.path.join(DATA_DIR, info_train["filename"])
    df_train = pd.read_csv(path_train)

    candidate_targets_train = POSSIBLE_TARGETS_BY_DATASET.get(
        train_dataset,
        POSSIBLE_TARGETS_GENERAL
    )

    target_col_train = None
    for cand in candidate_targets_train:
        if cand in df_train.columns:
            target_col_train = cand
            break

    if target_col_train is None:
        raise ValueError(f"Hittade ingen target-kolumn i {train_dataset}")

    X_train = df_train.drop(columns=[target_col_train])
    y_train = df_train[target_col_train]

    if y_train.dtype == "bool":
        y_train = y_train.astype(int)
    elif y_train.dtype == "object":
        y_train = y_train.astype(str).str.lower().map({
            "yes": 1, "true": 1, "defective": 1, "bug": 1, "bugs": 1,
            "problem": 1, "problems": 1, "1": 1, "0": 0,
        }).fillna(0).astype(int)

    # ===== 2) Läs in TEST-data =====
    info_test = DATASETS[test_dataset]
    path_test = os.path.join(DATA_DIR, info_test["filename"])
    df_test = pd.read_csv(path_test)

    candidate_targets_test = POSSIBLE_TARGETS_BY_DATASET.get(
        test_dataset,
        POSSIBLE_TARGETS_GENERAL
    )

    target_col_test = None
    for cand in candidate_targets_test:
        if cand in df_test.columns:
            target_col_test = cand
            break

    if target_col_test is None:
        raise ValueError(f"Hittade ingen target-kolumn i {test_dataset}")

    X_test = df_test.drop(columns=[target_col_test])
    y_test = df_test[target_col_test]

    if y_test.dtype == "bool":
        y_test = y_test.astype(int)
    elif y_test.dtype == "object":
        y_test = y_test.astype(str).str.lower().map({
            "yes": 1, "true": 1, "defective": 1, "bug": 1, "bugs": 1,
            "problem": 1, "problems": 1, "1": 1, "0": 0,
        }).fillna(0).astype(int)

    print(f"\n=== {train_dataset} → {test_dataset} ===")
    print("Train klassfördelning:")
    print(y_train.value_counts())
    print("\nTest klassfördelning:")
    print(y_test.value_counts(), "\n")

    # ===== 3) Aligna features: behåll bara gemensamma kolumner =====
    common_features = sorted(set(X_train.columns) & set(X_test.columns))

    if len(common_features) == 0:
        raise ValueError("Inga gemensamma features mellan train och test!")

    if len(common_features) < X_train.shape[1] or len(common_features) < X_test.shape[1]:
        dropped_train = set(X_train.columns) - set(common_features)
        dropped_test = set(X_test.columns) - set(common_features)
        print(f"Gemensamma features: {len(common_features)}")
        if dropped_train:
            print("Features som bara fanns i TRAIN och togs bort:", dropped_train)
        if dropped_test:
            print("Features som bara fanns i TEST och togs bort:", dropped_test)
        print()

    X_train = X_train[common_features].copy()
    X_test = X_test[common_features].copy()

    # ===== 4) Skala =====
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # ===== 5) Ev. RFE =====
    if use_rfe:
        print(f">>> Cross-project RFE: behåller {n_features_to_select} features <<<")
        temp_models = get_base_models()
        rfe_base_model = temp_models[model_name]

        X_train_scaled, X_test_scaled, rfe_selector = apply_rfe(
            base_model=rfe_base_model,
            X_train=X_train_scaled,
            y_train=y_train,
            X_test=X_test_scaled,
            n_features_to_select=n_features_to_select,
        )

    # ===== 6) Modell + SMOTE-läge =====
    base_models = get_base_models()
    base_model = base_models[model_name]

    used_model, smote_label = train_with_smote_mode(
        base_model=base_model,
        X_train=X_train_scaled,
        y_train=y_train,
        smote_mode=smote_mode,
    )

    # ===== 7) Utvärdera på TEST =====
    res = evaluate_model(used_model, X_test_scaled, y_test)

    print(f"Resultat – {train_dataset} → {test_dataset} – {model_name} – SMOTE: {smote_label}")
    for k, v in res.items():
        if k in ["y_pred", "y_proba"]:
            continue
        print(f"{k}: {v:.4f}")

    return res



# === Del 7: Meny för att köra experiment ===

def run_experiments_menu():
    # välj dataset
    print("Tillgängliga dataset:")
    dataset_names = list(DATASETS.keys())
    for idx, name in enumerate(dataset_names, start=1):
        print(f"{idx} = {name}")
    print("ALL = alla dataset")

    dataset_choice = input("Välj dataset (t.ex. 1, 2, 3 eller JM1/KC1/ALL): ").strip().upper()

    all_datasets_selected = False

    if dataset_choice == "ALL":
        datasets_to_run = dataset_names
        all_datasets_selected = True
    elif dataset_choice.isdigit():
        idx = int(dataset_choice) - 1
        if 0 <= idx < len(dataset_names):
            datasets_to_run = [dataset_names[idx]]
        else:
            print("Ogiltigt sifferval, använder första datasetet.")
            datasets_to_run = [dataset_names[0]]
    else:
        # anta att användaren skrev namnet direkt, t.ex. JM1
        if dataset_choice in DATASETS:
            datasets_to_run = [dataset_choice]
        else:
            print("Ogiltigt namn, använder första datasetet.")
            datasets_to_run = [dataset_names[0]]

    # välj modell(er)
    models = get_base_models()
    print("\nTillgängliga modeller:")
    model_names = list(models.keys())
    for idx, name in enumerate(model_names, start=1):
        print(f"{idx} = {name}")
    print("ALL = alla modeller")

    model_choice = input("Välj modell (t.ex. 1, 2 eller RandomForest/ALL): ").strip()

    all_models_selected = False

    if model_choice.upper() == "ALL":
        model_names_to_run = model_names
        all_models_selected = True
    elif model_choice.isdigit():
        idx = int(model_choice) - 1
        if 0 <= idx < len(model_names):
            model_names_to_run = [model_names[idx]]
        else:
            print("Ogiltigt sifferval, använder första modellen.")
            model_names_to_run = [model_names[0]]
    else:
        if model_choice in models:
            model_names_to_run = [model_choice]
        else:
            print("Ogiltigt modellnamn, använder första modellen.")
            model_names_to_run = [model_names[0]]

    # välj SMOTE-läge
    print("\nSMOTE-lägen:")
    print("0 = Ingen SMOTE")
    print("1 = Basic SMOTE (standardparametrar)")
    print("2 = GRID-SMOTE (enkel tuning)")
    print("3 = SMOTUNED-DE (evolutionär tuning)")
    print("4 = Jämför ALLA SMOTE-varianter för vald dataset + modell")
    smote_mode_input = input("Välj 0 / 1 / 2 / 3 / 4: ").strip()

    # mapping från menyval → intern sträng
    mode_map = {
        "0": "NONE",
        "1": "BASIC",
        "2": "GRID",
        "3": "SMOTUNED-DE",
    }

    # Specialfall: SMOTE-läge 4 = kalla compare_smote_variants
    if smote_mode_input == "4":
        if len(datasets_to_run) == 1 and len(model_names_to_run) == 1:
            ds = datasets_to_run[0]
            mn = model_names_to_run[0]
            df_compare, pivot_compare = compare_smote_variants(ds, mn)
            return df_compare
        else:
            print("\n⚠ SMOTE-läge 4 kräver EXAKT ett dataset och en modell (inte ALL).")
            print("Byter till läge 1 (Basic SMOTE) istället.\n")
            smote_mode_input = "1"  # fall back till BASIC

    # välj internt SMOTE-läge (om ogiltigt → NONE)
    selected_smote_mode = mode_map.get(smote_mode_input, "NONE")

    all_results = []

    for ds in datasets_to_run:
        print("\n==============================")
        print(f"Kör dataset: {ds}")
        print("==============================\n")

        X_train_scaled, X_test_scaled, y_train, y_test = load_and_prepare_dataset(ds)

        for model_name in model_names_to_run:
            base_models = get_base_models()  # nya instanser
            base_model = base_models[model_name]

            print(f"\n--- Modell: {model_name} ---")

            used_model, smote_label = train_with_smote_mode(
                base_model=base_model,
                X_train=X_train_scaled,
                y_train=y_train,
                smote_mode=selected_smote_mode,
            )

            # utvärdera
            eval_results = evaluate_model(used_model, X_test_scaled, y_test)
            print(f"Resultat – {ds} – {model_name} – SMOTE-läge {smote_label}")
            for k, v in eval_results.items():
                if k in ["y_pred", "y_proba"]:
                    continue
                print(f"{k}: {v:.4f}")

            all_results.append({
                "dataset": ds,
                "model": model_name,
                "smote_mode": smote_label,
                "accuracy": eval_results["accuracy"],
                "precision": eval_results["precision"],
                "recall": eval_results["recall"],
                "f1": eval_results["f1"],
                "auc": eval_results["auc"],
            })

    results_df = pd.DataFrame(all_results)
    print("\n=== Sammanfattning av alla körningar ===")
    display(results_df)

    # Om du kör ALL + ALL: visa pivot-tabell på F1
    if all_datasets_selected and all_models_selected and not results_df.empty:
        pivot_f1 = results_df.pivot_table(
            index=["dataset", "model"],
            columns="smote_mode",
            values="f1"
        )
        print("\n=== F1 per dataset/modell och SMOTE-läge ===")
        display(pivot_f1)

    return results_df



Kod för ttest och 3-fold CV

In [None]:
# JM1 – baseline vs ensemble
f1_lr = cross_val_f1_scores(
    dataset_name="JM1",
    model_name="LogisticRegression",
    smote_mode="NONE",
    use_rfe=False,
    n_splits=3
)

f1_rf = cross_val_f1_scores(
    dataset_name="JM1",
    model_name="RandomForest",
    smote_mode="SMOTUNED-DE",
    use_rfe=False,
    n_splits=3
)

print("LR F1-scores:", f1_lr)
print("RF F1-scores:", f1_rf)

t_stat, p_val = ttest_rel(f1_lr, f1_rf)
print(f"Paired t-test: t = {t_stat:.4f}, p = {p_val:.4f}")


Kod för cross-projekt. Dvs tränar på KC1 och testar på KC2

In [None]:
res_cpdp = cross_project_experiment(
    train_dataset="KC1",
    test_dataset="KC2",
    model_name="RandomForest",
    smote_mode="SMOTUNED-DE",
    use_rfe=False  # börja utan RFE
)


Kod för smote varianter utan RFE

In [None]:
# Experiment 1: JM1 + RandomForest utan RFE
df_no_rfe, pivot_no_rfe = compare_smote_variants(
    "JM1",
    "RandomForest",
    use_rfe=False  # ingen feature selection
)

df_no_rfe


Kod för smote varianter MED RFE

In [None]:
# Experiment 2: JM1 + RandomForest med RFE (t.ex. 12 features)
df_rfe, pivot_rfe = compare_smote_variants(
    "JM1",
    "RandomForest",
    use_rfe=True,          # aktivera RFE
    n_features_to_select=12  # antal features du behåller
)

df_rfe


Kod för tabell jämförelse av dataset + modell med/utan RFE

In [None]:
# Jämför JM1 + RandomForest med och utan RFE

df_no_rfe_tagged = df_no_rfe.copy().set_index("smote_mode")
df_rfe_tagged = df_rfe.copy().set_index("smote_mode")

comparison = pd.DataFrame({
    "F1_no_rfe": df_no_rfe_tagged["f1"],
    "F1_rfe": df_rfe_tagged["f1"],
    "Recall_no_rfe": df_no_rfe_tagged["recall"],
    "Recall_rfe": df_rfe_tagged["recall"],
})

comparison["delta_F1"] = comparison["F1_rfe"] - comparison["F1_no_rfe"]
comparison["delta_recall"] = comparison["Recall_rfe"] - comparison["Recall_no_rfe"]

comparison


För att köra via menyn

In [None]:
results_df = run_experiments_menu()


Så här tolkar du den:
Varje rad = samma modell + samma dataset, men olika SMOTE-upplägg
NONE: ingen SMOTE (original obalanserad träning).
BASIC: standard-SMOTE.
GRID: SMOTE med grid-tunade parametrar.
SMOTUNED-DE: SMOTE med DE-optimerade parametrar.

Kolumnerna:
accuracy → andel rätt totalt.
→ kan vara missvisande på obalanserad data, för att en modell kan få hög accuracy genom att nästan alltid gissa 0.
precision (för klass 1) → “när modellen säger defekt, hur ofta har den rätt?”
recall (för klass 1) → “hur stor andel av alla verkliga defekter hittar modellen?”
f1 → balans mellan precision & recall (bra huvudmått).
auc → hur bra modellen rangordnar defekter vs icke-defekter (threshold-oberoende).