In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, classification_report
from imblearn.over_sampling import SMOTE
import mlflow
import mlflow.sklearn


# MLflow config
mlflow.set_tracking_uri("http://127.0.0.1:5000")  # eller din serveradresse
mlflow.set_experiment("creditcard_fraud_detection_2")

# Load data
data_path = r"C:\Job_og_eksamensbevis\Github\projekter\RF_project\data\creditcard_preprocessed.csv"
df = pd.read_csv(data_path)

X = df.drop('Class', axis=1)
y = df['Class']

X_train_val, X_test, y_train_val, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

precisions = []
recalls = []
f1s = []

# === CV TRÆNING + MLflow logging ===
for fold, (train_idx, val_idx) in enumerate(skf.split(X_train_val, y_train_val), 1):
    X_train, X_val = X_train_val.iloc[train_idx], X_train_val.iloc[val_idx]
    y_train, y_val = y_train_val.iloc[train_idx], y_train_val.iloc[val_idx]

    smote = SMOTE(random_state=42)
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

    rf = RandomForestClassifier(
        n_estimators=100,
        random_state=42,
        n_jobs=-1
    )

    rf.fit(X_train_resampled, y_train_resampled)
    y_val_pred = rf.predict(X_val)

    precision = precision_score(y_val, y_val_pred)
    recall = recall_score(y_val, y_val_pred)
    f1 = f1_score(y_val, y_val_pred)

    precisions.append(precision)
    recalls.append(recall)
    f1s.append(f1)

    # MLflow logging for fold
    with mlflow.start_run(run_name=f"Fold_{fold}"):
        mlflow.log_param("n_estimators", 100)
        mlflow.log_param("sampler", "SMOTE")
        mlflow.log_param("fold", fold)

        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)

        mlflow.sklearn.log_model(rf, f"rf_model_fold_{fold}")

        print(f"\nFold {fold}:")
        print("Confusion Matrix:")
        print(confusion_matrix(y_val, y_val_pred))
        print(f"Precision: {precision:.4f}")
        print(f"Recall:    {recall:.4f}")
        print(f"F1-score:  {f1:.4f}")
        print("Classification report:")
        print(classification_report(y_val, y_val_pred))

print("\n=== Gennemsnitlig performance over 5 folds ===")
print(f"Gns. Precision: {sum(precisions)/len(precisions):.4f}")
print(f"Gns. Recall:    {sum(recalls)/len(recalls):.4f}")
print(f"Gns. F1-score:  {sum(f1s)/len(f1s):.4f}")



# === Final model  ===

smote = SMOTE(random_state=42)
X_train_val_resampled, y_train_val_resampled = smote.fit_resample(X_train_val, y_train_val)

final_rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
final_rf.fit(X_train_val_resampled, y_train_val_resampled)

y_test_pred = final_rf.predict(X_test)

precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred)

print("\n=== Endelig evaluering på test-sæt ===")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-score:  {f1:.4f}")
print("Classification report:")
print(classification_report(y_test, y_test_pred))


with mlflow.start_run(run_name="Final_model_training"):
    mlflow.log_param("n_estimators", 100)
    mlflow.log_param("sampler", "SMOTE")
    mlflow.log_param("model_type", "RandomForest")

    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)

    # Her logger vi modellen KUN som artifact – uden at registrere den i MLflow Model Registry
    mlflow.sklearn.log_model(
        sk_model=final_rf,
        artifact_path="model"
    )




Fold 1:
Confusion Matrix:
[[45479    12]
 [   15    63]]
Precision: 0.8400
Recall:    0.8077
F1-score:  0.8235
Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     45491
           1       0.84      0.81      0.82        78

    accuracy                           1.00     45569
   macro avg       0.92      0.90      0.91     45569
weighted avg       1.00      1.00      1.00     45569

🏃 View run Fold_1 at: http://127.0.0.1:5000/#/experiments/3/runs/877d64d926d442179fae731f9392ae07
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/3





Fold 2:
Confusion Matrix:
[[45484     6]
 [   21    58]]
Precision: 0.9062
Recall:    0.7342
F1-score:  0.8112
Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     45490
           1       0.91      0.73      0.81        79

    accuracy                           1.00     45569
   macro avg       0.95      0.87      0.91     45569
weighted avg       1.00      1.00      1.00     45569

🏃 View run Fold_2 at: http://127.0.0.1:5000/#/experiments/3/runs/625259e0f0aa4ec5991fc6bda29539cd
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/3





Fold 3:
Confusion Matrix:
[[45485     5]
 [   20    59]]
Precision: 0.9219
Recall:    0.7468
F1-score:  0.8252
Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     45490
           1       0.92      0.75      0.83        79

    accuracy                           1.00     45569
   macro avg       0.96      0.87      0.91     45569
weighted avg       1.00      1.00      1.00     45569

🏃 View run Fold_3 at: http://127.0.0.1:5000/#/experiments/3/runs/77d44fb655984e1fa0e544022478957a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/3





Fold 4:
Confusion Matrix:
[[45477    13]
 [    8    71]]
Precision: 0.8452
Recall:    0.8987
F1-score:  0.8712
Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     45490
           1       0.85      0.90      0.87        79

    accuracy                           1.00     45569
   macro avg       0.92      0.95      0.94     45569
weighted avg       1.00      1.00      1.00     45569

🏃 View run Fold_4 at: http://127.0.0.1:5000/#/experiments/3/runs/d8ed041ddb5b47ac9b991de94e0f9268
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/3





Fold 5:
Confusion Matrix:
[[45484     6]
 [   14    65]]
Precision: 0.9155
Recall:    0.8228
F1-score:  0.8667
Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     45490
           1       0.92      0.82      0.87        79

    accuracy                           1.00     45569
   macro avg       0.96      0.91      0.93     45569
weighted avg       1.00      1.00      1.00     45569

🏃 View run Fold_5 at: http://127.0.0.1:5000/#/experiments/3/runs/10a54930d27a4a01ad09bf06d3c7d3a8
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/3

=== Gennemsnitlig performance over 5 folds ===
Gns. Precision: 0.8858
Gns. Recall:    0.8020
Gns. F1-score:  0.8395

=== Endelig evaluering på test-sæt ===
Confusion Matrix:
[[56853    11]
 [   18    80]]
Precision: 0.8791
Recall:    0.8163
F1-score:  0.8466
Classification report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.0



🏃 View run Final_model_training at: http://127.0.0.1:5000/#/experiments/3/runs/e835ced6b4dd41e98d2ddb6b18e3220c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/3


In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
import mlflow.sklearn
import pandas as pd
import numpy as np

# === Indlæs den færdigtrænede model fra MLflow artifacts ===
run_id = e835ced6b4dd41e98d2ddb6b18e3220c
model_uri = f"runs:/{run_id}/model"  # Udskift <RUN_ID> med den faktiske run-id for din final model
model = mlflow.sklearn.load_model(model_uri)

# === FastAPI instans ===
app = FastAPI(title="Credit Card Fraud Detection API")

# === Pydantic model til input ===
class Transaction(BaseModel):
    # Tilføj alle features her som float. Eksempel:
    V1: float
    V2: float
    V3: float
    V4: float
    V5: float
    V6: float
    V7: float
    V8: float
    V9: float
    V10: float
    V11: float
    V12: float
    V13: float
    V14: float
    V15: float
    V16: float
    V17: float
    V18: float
    V19: float
    V20: float
    V21: float
    V22: float
    V23: float
    V24: float
    V25: float
    V26: float
    V27: float
    V28: float
    Amount: float

@app.get("/")
def root():
    return {"message": "API kører – send POST-anmodninger til /predict"}

@app.post("/predict")
def predict(transaction: Transaction):
    # Konverter til DataFrame
    input_df = pd.DataFrame([transaction.dict()])

    # Forudsig
    prediction = model.predict(input_df)[0]
    probability = model.predict_proba(input_df)[0][1]

    return {
        "prediction": int(prediction),
        "probability_of_fraud": round(float(probability), 4)
    }

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
mlflow.search_runs()
