### Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import recall_score, confusion_matrix, classification_report, roc_curve, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns

#### Load the data

In [2]:
df = pd.read_csv('credit_data_norm.csv')

#### Preprocess the data & Drop unnecessary columns

In [3]:
df = df.drop(['Time'], axis=1)

KeyError: "['Time'] not found in axis"

#### Check for missing values

In [None]:
print(df.isnull().sum())

#### Check the class balance

In [None]:
print(df['Class'].value_counts())

#### Balance the data using random under-sampling

In [None]:
from imblearn.under_sampling import RandomUnderSampler
rus = RandomUnderSampler(random_state=0)
X_resampled, y_resampled = rus.fit_resample(df.drop('Class', axis=1), df['Class'])

#### Check the class balance after resampling

In [None]:
print(pd.Series(y_resampled).value_counts())

#### Standardize the data

In [None]:
scaler = StandardScaler()
X_resampled = scaler.fit_transform(X_resampled)

#### Split the data into training and testing sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=0)

#### Train the model

In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

#### Make predictions

In [None]:
y_pred = model.predict(X_test)

#### Evaluate the model

In [None]:
print('Recall:', recall_score(y_test, y_pred))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
print('Classification Report:\n', classification_report(y_test, y_pred))

### Plot the ROC curve

In [None]:
y_prob = model.predict_proba(X_test)[:, 1]fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = roc_auc_score(y_test, y_prob)
plt.plot(fpr, tpr, label='ROC curve (AUC = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

### Plot the confusion matrix

In [None]:
sns.set(style="whitegrid")
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 3))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt="d", cmap="Blues", ax=ax[0])
ax[0].set_title('Logistic Regression')
sns.heatmap(confusion_matrix(y_test, y_pred_DT_final), annot=True, fmt="d", cmap="Blues", ax=ax[1])
ax[1].set_title('Decision Tree')
plt.show()

### Plot the correlation matrix

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
plt.title('Correlation matrix')
plt.show()

### Print the amount details for fraudulent transactions

In [None]:
print(df[df['Class'] == 1]['Amount'].describe())

### Plot the boxplots

In [None]:
plt.figure(figsize=(12, 4))
sns.boxplot(x=df['Class'], y=df['Amount'], orient='h', showmeans=True, palette=['#bdbdbd', '#004a8f'])
plt.title('Amount spent on transactions')
plt.xlabel('Class')
plt.ylabel('Amount')
plt.show()