In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [None]:
# Load the RFMS dataset
data = pd.read_csv('rfms_default_estimator.csv')

In [None]:
# Step 1: Split the Data
X = data.drop(columns=['User_Label'])
y = data['User_Label'].map({'Good': 1, 'Bad': 0})  # Convert labels to binary

In [None]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Step 2: Train the Models

# Model 1: Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)

In [None]:
# Model 2: Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [None]:
# Step 3: Evaluate the Models
def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_pred)
    return accuracy, precision, recall, f1, auc

log_reg_metrics = evaluate_model(y_test, y_pred_log_reg)
rf_metrics = evaluate_model(y_test, y_pred_rf)

In [None]:
# Display results
print("Logistic Regression Metrics: ", log_reg_metrics)
print("Random Forest Metrics: ", rf_metrics)

In [None]:
# Save the trained models for future use in API serving
import pickle
with open('logistic_regression_model.pkl', 'wb') as f:
    pickle.dump(log_reg, f)
with open('random_forest_model.pkl', 'wb') as f:
    pickle.dump(rf, f)