In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    classification_report,
    roc_auc_score,
    confusion_matrix,
    ConfusionMatrixDisplay
)
import matplotlib.pyplot as plt

In [9]:
df = pd.read_csv(r'C:\Users\anne\Desktop\Daki\s2\projekter\P2\Final_PCA.csv')

# Split into features and target
X = df[["PC1", "PC2"]].values
y = df["Label"].values

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [10]:
# Final model with best hyperparameters
model = XGBClassifier(
    n_estimators=300,
    max_depth=2,
    learning_rate=0.10,
    subsample=0.9,
    colsample_bytree=0.9,
    scale_pos_weight=(y_train == 0).sum() / (y_train == 1).sum(),
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)

# Fit & predict
model.fit(X_train, y_train)
y_pred  = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

# Evaluation
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['False','True']))

print(f"ROC AUC: {roc_auc_score(y_test, y_proba):.3f}\n")

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Parameters: { "use_label_encoder" } are not used.



Classification Report:
              precision    recall  f1-score   support

       False       1.00      0.93      0.96      3120
        True       0.26      0.96      0.40        80

    accuracy                           0.93      3200
   macro avg       0.63      0.95      0.68      3200
weighted avg       0.98      0.93      0.95      3200

ROC AUC: 0.985

Confusion Matrix:
[[2896  224]
 [   3   77]]


In [11]:
# Final model with best hyperparameters (from your GridSearchCV)
best_params = {
    'penalty':       'l2',
    'C':             1.0,
    'class_weight': 'balanced',
    'solver':       'liblinear',
    'random_state': 42
}

model = LogisticRegression(**best_params)

# Fit & predict
model.fit(X_train, y_train)
y_pred  = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

# Evaluation
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['False','True']))

print(f"ROC AUC: {roc_auc_score(y_test, y_proba):.3f}\n")

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

       False       1.00      0.69      0.82      3120
        True       0.07      0.93      0.13        80

    accuracy                           0.70      3200
   macro avg       0.53      0.81      0.48      3200
weighted avg       0.97      0.70      0.80      3200

ROC AUC: 0.826

Confusion Matrix:
[[2167  953]
 [   6   74]]
