# Import libraries


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

# Load the dataset

In [2]:
data_train = pd.read_csv('/content/train.csv')
data_test = pd.read_csv('/content/test.csv')

# Separate features (X) and target variable (y)


In [3]:
X_train = data_train.drop('Class', axis=1)
y_train = data_train['Class']

In [4]:
X_test = data_test.drop('Class', axis=1)
y_test = data_test['Class']

# 1. Logistic Regression without Handling Imbalance


In [5]:
print("Logistic Regression without Handling Imbalance:")
logreg_model = LogisticRegression(random_state=42)
logreg_model.fit(X_train, y_train)

y_pred = logreg_model.predict(X_train)
print("Confusion Matrix (Train):\n", confusion_matrix(y_train, y_pred))
print("Classification Report (Train):\n", classification_report(y_train, y_pred))

y_pred_test = logreg_model.predict(X_test)
print("Confusion Matrix (Test):\n", confusion_matrix(y_test, y_pred_test))
print("Classification Report (Test):\n", classification_report(y_test, y_pred_test))

Logistic Regression without Handling Imbalance:
Confusion Matrix (Train):
 [[2090  140]
 [ 132 1318]]
Classification Report (Train):
               precision    recall  f1-score   support

           0       0.94      0.94      0.94      2230
           1       0.90      0.91      0.91      1450

    accuracy                           0.93      3680
   macro avg       0.92      0.92      0.92      3680
weighted avg       0.93      0.93      0.93      3680

Confusion Matrix (Test):
 [[524  34]
 [ 43 320]]
Classification Report (Test):
               precision    recall  f1-score   support

           0       0.92      0.94      0.93       558
           1       0.90      0.88      0.89       363

    accuracy                           0.92       921
   macro avg       0.91      0.91      0.91       921
weighted avg       0.92      0.92      0.92       921



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# 2. Logistic Regression with Oversampling


In [6]:
print("\nLogistic Regression with Oversampling:")
oversampler = RandomOverSampler(sampling_strategy='auto', random_state=42)
X_resampled_over, y_resampled_over = oversampler.fit_resample(X_train, y_train)

logreg_model_over = LogisticRegression(random_state=42)
logreg_model_over.fit(X_resampled_over, y_resampled_over)

y_pred_over = logreg_model_over.predict(X_resampled_over)
print("Confusion Matrix (Train with Oversampling):\n", confusion_matrix(y_resampled_over, y_pred_over))
print("Classification Report (Train with Oversampling):\n", classification_report(y_resampled_over, y_pred_over))

y_pred_over_test = logreg_model_over.predict(X_test)
print("Confusion Matrix (Test with Oversampling):\n", confusion_matrix(y_test, y_pred_over_test))
print("Classification Report (Test with Oversampling):\n", classification_report(y_test, y_pred_over_test))



Logistic Regression with Oversampling:
Confusion Matrix (Train with Oversampling):
 [[2058  172]
 [ 167 2063]]
Classification Report (Train with Oversampling):
               precision    recall  f1-score   support

           0       0.92      0.92      0.92      2230
           1       0.92      0.93      0.92      2230

    accuracy                           0.92      4460
   macro avg       0.92      0.92      0.92      4460
weighted avg       0.92      0.92      0.92      4460

Confusion Matrix (Test with Oversampling):
 [[515  43]
 [ 38 325]]
Classification Report (Test with Oversampling):
               precision    recall  f1-score   support

           0       0.93      0.92      0.93       558
           1       0.88      0.90      0.89       363

    accuracy                           0.91       921
   macro avg       0.91      0.91      0.91       921
weighted avg       0.91      0.91      0.91       921



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# 3. Logistic Regression with Undersampling


In [7]:
print("\nLogistic Regression with Undersampling:")
undersampler = RandomUnderSampler(sampling_strategy='auto', random_state=42)
X_resampled_under, y_resampled_under = undersampler.fit_resample(X_train, y_train)

logreg_model_under = LogisticRegression(random_state=42)
logreg_model_under.fit(X_resampled_under, y_resampled_under)

y_pred_under = logreg_model_under.predict(X_resampled_under)
print("Confusion Matrix (Train with Undersampling):\n", confusion_matrix(y_resampled_under, y_pred_under))
print("Classification Report (Train with Undersampling):\n", classification_report(y_resampled_under, y_pred_under))

y_pred_under_test = logreg_model_under.predict(X_test)
print("Confusion Matrix (Test with Undersampling):\n", confusion_matrix(y_test, y_pred_under_test))
print("Classification Report (Test with Undersampling):\n", classification_report(y_test, y_pred_under_test))



Logistic Regression with Undersampling:




Confusion Matrix (Train with Undersampling):
 [[1320  130]
 [ 104 1346]]
Classification Report (Train with Undersampling):
               precision    recall  f1-score   support

           0       0.93      0.91      0.92      1450
           1       0.91      0.93      0.92      1450

    accuracy                           0.92      2900
   macro avg       0.92      0.92      0.92      2900
weighted avg       0.92      0.92      0.92      2900

Confusion Matrix (Test with Undersampling):
 [[517  41]
 [ 33 330]]
Classification Report (Test with Undersampling):
               precision    recall  f1-score   support

           0       0.94      0.93      0.93       558
           1       0.89      0.91      0.90       363

    accuracy                           0.92       921
   macro avg       0.91      0.92      0.92       921
weighted avg       0.92      0.92      0.92       921



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# 4. Logistic Regression with Weighted Loss Function


In [8]:
print("\nLogistic Regression with Weighted Loss Function:")
class_weights = dict(zip([0, 1], len(y_train) / (2 * y_train.value_counts())))

logreg_model_weighted = LogisticRegression(class_weight=class_weights, random_state=42)
logreg_model_weighted.fit(X_train, y_train)

y_pred_weighted = logreg_model_weighted.predict(X_train)
print("Confusion Matrix (Train with Weighted Loss):\n", confusion_matrix(y_train, y_pred_weighted))
print("Classification Report (Train with Weighted Loss):\n", classification_report(y_train, y_pred_weighted))

y_pred_weighted_test = logreg_model_weighted.predict(X_test)
print("Confusion Matrix (Test with Weighted Loss):\n", confusion_matrix(y_test, y_pred_weighted_test))
print("Classification Report (Test with Weighted Loss):\n", classification_report(y_test, y_pred_weighted_test))



Logistic Regression with Weighted Loss Function:
Confusion Matrix (Train with Weighted Loss):
 [[2059  171]
 [ 111 1339]]
Classification Report (Train with Weighted Loss):
               precision    recall  f1-score   support

           0       0.95      0.92      0.94      2230
           1       0.89      0.92      0.90      1450

    accuracy                           0.92      3680
   macro avg       0.92      0.92      0.92      3680
weighted avg       0.92      0.92      0.92      3680

Confusion Matrix (Test with Weighted Loss):
 [[518  40]
 [ 37 326]]
Classification Report (Test with Weighted Loss):
               precision    recall  f1-score   support

           0       0.93      0.93      0.93       558
           1       0.89      0.90      0.89       363

    accuracy                           0.92       921
   macro avg       0.91      0.91      0.91       921
weighted avg       0.92      0.92      0.92       921



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
