In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
from sklearn.utils import resample

# Load the dataset
df = pd.read_csv('/content/creditcard.csv')

# Split features and target
X = df.drop('Class', axis=1)
y = df['Class']

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Handle class imbalance with random undersampling of majority class
Xy = pd.concat([pd.DataFrame(X_scaled), y.reset_index(drop=True)], axis=1)
majority = Xy[Xy['Class'] == 0]
minority = Xy[Xy['Class'] == 1]
majority_downsampled = resample(majority,
                                replace=False,
                                n_samples=len(minority),
                                random_state=42)
balanced_data = pd.concat([majority_downsampled, minority])

# Shuffle data
balanced_data = balanced_data.sample(frac=1, random_state=42).reset_index(drop=True)
y_bal = balanced_data['Class']
X_bal = balanced_data.drop('Class', axis=1)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_bal, y_bal, test_size=0.2, random_state=42, stratify=y_bal)

# Train classifiers
logreg = LogisticRegression(max_iter=1000, random_state=42)
logreg.fit(X_train, y_train)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predictions
y_logreg_pred = logreg.predict(X_test)
y_rf_pred = rf.predict(X_test)

# Evaluation metrics
print('Logistic Regression Classification Report:')
print(classification_report(y_test, y_logreg_pred))
print('Precision:', precision_score(y_test, y_logreg_pred))
print('Recall:', recall_score(y_test, y_logreg_pred))
print('F1-score:', f1_score(y_test, y_logreg_pred))

print('\nRandom Forest Classification Report:')
print(classification_report(y_test, y_rf_pred))
print('Precision:', precision_score(y_test, y_rf_pred))
print('Recall:', recall_score(y_test, y_rf_pred))
print('F1-score:', f1_score(y_test, y_rf_pred))


Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.67      1.00      0.80         2

    accuracy                           0.75         4
   macro avg       0.83      0.75      0.73         4
weighted avg       0.83      0.75      0.73         4

Precision: 0.6666666666666666
Recall: 1.0
F1-score: 0.8

Random Forest Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         2

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

Precision: 1.0
Recall: 1.0
F1-score: 1.0
