In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.exceptions import ConvergenceWarning 
import warnings

In [3]:
encoded_df = pd.read_csv("../data/encoded_df.csv")

In [None]:
X = encoded_df.drop('y', axis=1)
y = encoded_df['y']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

potential_num_cols = ['age', 'balance', 'duration', 'campaign', 'previous', 'days_since_last_contact']
num_cols_to_scale = [col for col in potential_num_cols if col in X_train.columns]

scaler = StandardScaler()
X_train[num_cols_to_scale] = scaler.fit_transform(X_train[num_cols_to_scale])
X_test[num_cols_to_scale] = scaler.transform(X_test[num_cols_to_scale])


log_reg = LogisticRegression(random_state=42, max_iter=5000)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning)
    log_reg.fit(X_train, y_train)

# Prediction
y_pred = log_reg.predict(X_test)

In [None]:
print("Logistic Regression Model Evaluation:")
print("="*35)
print(f"Accuracy Score: {accuracy_score(y_test, y_pred):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))