In [1]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import xgboost as xgb

# 1. Generate imbalanced dataset
X, y = make_classification(
    n_samples=5000, n_features=20, n_informative=5, n_redundant=2,
    weights=[0.9, 0.1], flip_y=0.01, class_sep=1.0, random_state=42
)

# 2. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# 3. Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# --- Logistic Regression ---
logreg = LogisticRegression(class_weight='balanced', solver='liblinear')
logreg.fit(X_train_scaled, y_train)
y_pred_logreg = logreg.predict(X_test_scaled)
y_proba_logreg = logreg.predict_proba(X_test_scaled)[:, 1]

# --- Random Forest ---
rf = RandomForestClassifier(class_weight='balanced', n_estimators=100, random_state=42)
rf.fit(X_train_scaled, y_train)
y_pred_rf = rf.predict(X_test_scaled)
y_proba_rf = rf.predict_proba(X_test_scaled)[:, 1]

# --- Support Vector Machine ---
svm = SVC(probability=True, kernel='rbf', class_weight='balanced')
svm.fit(X_train_scaled, y_train)
y_pred_svm = svm.predict(X_test_scaled)
y_proba_svm = svm.predict_proba(X_test_scaled)[:, 1]



# --- XGBoost ---
xgb_model = xgb.XGBClassifier(
    scale_pos_weight=9,  # 90:10 class ratio
    n_estimators=100,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)
xgb_model.fit(X_train_scaled, y_train)
y_pred_xgb = xgb_model.predict(X_test_scaled)
y_proba_xgb = xgb_model.predict_proba(X_test_scaled)[:, 1]

# --- Evaluation Function ---
def evaluate_model(name, y_test, y_pred, y_proba):
    print(f"\n🔹 {name}")
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))
    print(f"ROC AUC Score: {roc_auc_score(y_test, y_proba):.4f}")

# --- Evaluate All Models ---
evaluate_model("Logistic Regression", y_test, y_pred_logreg, y_proba_logreg)
evaluate_model("Random Forest", y_test, y_pred_rf, y_proba_rf)
evaluate_model("SVM", y_test, y_pred_svm, y_proba_svm)

evaluate_model("XGBoost", y_test, y_pred_xgb, y_proba_xgb)


Parameters: { "use_label_encoder" } are not used.




🔹 Logistic Regression
Confusion Matrix:
 [[729 168]
 [ 15  88]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9798    0.8127    0.8885       897
           1     0.3438    0.8544    0.4903       103

    accuracy                         0.8170      1000
   macro avg     0.6618    0.8335    0.6894      1000
weighted avg     0.9143    0.8170    0.8475      1000

ROC AUC Score: 0.8848

🔹 Random Forest
Confusion Matrix:
 [[895   2]
 [ 54  49]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9431    0.9978    0.9697       897
           1     0.9608    0.4757    0.6364       103

    accuracy                         0.9440      1000
   macro avg     0.9519    0.7367    0.8030      1000
weighted avg     0.9449    0.9440    0.9353      1000

ROC AUC Score: 0.9590

🔹 SVM
Confusion Matrix:
 [[852  45]
 [ 24  79]]

Classification Report:
               precision    recall  f1-score   support


In [2]:
models=[(
    'Logistic Regression',
    {'class_weight':'balanced', 'solver':'liblinear'},
    LogisticRegression(class_weight='balanced', solver='liblinear'),
    (X_train_scaled, y_train),
     (X_test_scaled,y_test)
     
    ),
    (
        ' Random Forest',
        {'class_weight':'balanced','n_estimators':100,'random_state':42},
         RandomForestClassifier(class_weight='balanced', n_estimators=100, random_state=42),
        (X_train_scaled, y_train),
          (X_test_scaled,y_test)
         ),
        (
            ' Support Vector Machine',
            {'probability':True, 'kernel':'rbf', 'class_weight':'balanced'},
            SVC(probability=True, kernel='rbf', class_weight='balanced'),
          (X_train_scaled, y_train),
          (X_test_scaled,y_test)
        ),
       (
           'XGBoost',
            {'scale_pos_weight':9,  'n_estimators':100,'use_label_encoder':False,'eval_metric':'logloss','random_state':42},
          xgb.XGBClassifier(scale_pos_weight=9,n_estimators=100,use_label_encoder=False,eval_metric='logloss',random_state=42),  
           (X_train_scaled, y_train),
          (X_test_scaled,y_test)
           )

]

In [3]:
reports=[]
for model_name,params,model,train_set,test_set in models:
    X_train=train_set[0]
    y_train=train_set[1]
    X_test=test_set[0]
    y_test=test_set[1]
    model.fit(X_train,y_train)
    y_pred=model.predict(X_test)
    report=classification_report(y_test,y_pred,output_dict=True)
    reports.append(report)
    
    
print(reports)


    

Parameters: { "use_label_encoder" } are not used.



[{'0': {'precision': 0.9798387096774194, 'recall': 0.8127090301003345, 'f1-score': 0.8884826325411335, 'support': 897.0}, '1': {'precision': 0.34375, 'recall': 0.8543689320388349, 'f1-score': 0.49025069637883006, 'support': 103.0}, 'accuracy': 0.817, 'macro avg': {'precision': 0.6617943548387097, 'recall': 0.8335389810695847, 'f1-score': 0.6893666644599817, 'support': 1000.0}, 'weighted avg': {'precision': 0.9143215725806452, 'recall': 0.817, 'f1-score': 0.8474647431164162, 'support': 1000.0}}, {'0': {'precision': 0.9430979978925185, 'recall': 0.9977703455964325, 'f1-score': 0.9696641386782232, 'support': 897.0}, '1': {'precision': 0.9607843137254902, 'recall': 0.47572815533980584, 'f1-score': 0.6363636363636364, 'support': 103.0}, 'accuracy': 0.944, 'macro avg': {'precision': 0.9519411558090043, 'recall': 0.7367492504681192, 'f1-score': 0.8030138875209298, 'support': 1000.0}, 'weighted avg': {'precision': 0.9449196884233146, 'recall': 0.944, 'f1-score': 0.9353341869398207, 'support': 

In [5]:
import mlflow
import mlflow.sklearn
import mlflow.xgboost

In [6]:
report


{'0': {'precision': 0.9641304347826087,
  'recall': 0.9888517279821628,
  'f1-score': 0.9763346175013758,
  'support': 897.0},
 '1': {'precision': 0.875,
  'recall': 0.6796116504854369,
  'f1-score': 0.7650273224043715,
  'support': 103.0},
 'accuracy': 0.957,
 'macro avg': {'precision': 0.9195652173913044,
  'recall': 0.8342316892337999,
  'f1-score': 0.8706809699528737,
  'support': 1000.0},
 'weighted avg': {'precision': 0.9549500000000001,
  'recall': 0.957,
  'f1-score': 0.9545699661063844,
  'support': 1000.0}}

In [9]:
import mlflow
import mlflow.sklearn
import mlflow.xgboost

# Initialize mlflow
mlflow.set_tracking_uri('http://127.0.0.1:5000/')
mlflow.set_experiment('Anamoly')

for i, element in enumerate(models):
    model_name = element[0]
    params = element[1]
    model = element[2]
    report = reports[i]

    with mlflow.start_run(run_name=model_name):
        # Log model name
        mlflow.log_param('model', model_name)

        # ✅ Log metrics correctly using commas, not equals
        mlflow.log_metric('accuracy', report['accuracy'])
        mlflow.log_metric('recall_0', report['0']['recall'])
        mlflow.log_metric('recall_1', report['1']['recall'])
        mlflow.log_metric('precision_0', report['0']['precision'])
        mlflow.log_metric('precision_1', report['1']['precision'])
        mlflow.log_metric('f1_score_macro', report['macro avg']['f1-score'])

        # ✅ Check if model name contains "xgb" (case-insensitive)
        if "xgb" in model_name.lower():
            mlflow.xgboost.log_model(model, "model")
        else:
            mlflow.sklearn.log_model(model, "model")

            

        
        
    


    





2025/04/05 11:52:31 INFO mlflow.tracking.fluent: Experiment with name 'Anamoly' does not exist. Creating a new experiment.


🏃 View run Logistic Regression at: http://127.0.0.1:5000/#/experiments/500420123018658551/runs/2c601eb74d4e4c6f8840b4ddf2cb3450
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/500420123018658551




🏃 View run  Random Forest at: http://127.0.0.1:5000/#/experiments/500420123018658551/runs/96dd2c69f9bf4d7d8e07467fce56eee4
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/500420123018658551




🏃 View run  Support Vector Machine at: http://127.0.0.1:5000/#/experiments/500420123018658551/runs/b22616c25e354511aaa5bada13329b72
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/500420123018658551




🏃 View run XGBoost at: http://127.0.0.1:5000/#/experiments/500420123018658551/runs/54579c72f50f453abe3647fe408df1bc
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/500420123018658551
