# Credit Card Fraud Detection
Rule-based ‚Üí Machine Learning approach


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime

%matplotlib inline

In [None]:
import os

os.listdir()


In [None]:
!mv /creditcard.csv /content/


In [None]:
!pwd
!ls /content


In [None]:
df = pd.read_csv("creditcard.csv")

df.head()

In [None]:
df.shape

# analyzing the dataset here rows will be the no of
# transactions and columns will be be features + target

In [None]:
df.columns


In [None]:
df.info()


### Missing Values Analysis

The dataset contains no missing (null) values across all features, as confirmed using `df.info()`.
Therefore, no imputation or missing-value handling is required for this dataset.


In [None]:
df['Class'].value_counts()
df['Class'].value_counts(normalize=True) * 100

### Class Imbalance Analysis

The target variable `Class` shows a highly imbalanced distribution, where fraudulent transactions (Class = 1) represent a very small percentage of the total dataset, while non-fraudulent transactions (Class = 0) dominate.

This imbalance makes accuracy an unreliable metric, as a model could predict all transactions as non-fraud and still achieve high accuracy. Therefore, evaluation metrics such as precision, recall, and F1-score are more appropriate for this problem.


In [None]:
#now visulization of the percentage distributions of the target "class"

sns.countplot(x='Class', data=df)
plt.title('class distribution (Fraud vs Non Fraud)')
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.histplot(df[df['Class']== 0]['Amount'], bins=50, log_scale=True, label='Non-Fraud', color='blue')
sns.histplot(df[df['Class']== 1]['Amount'], bins=50, log_scale=True, label = 'Fraud', color='red')
plt.title('Distribution of Transaction Amounts (Fraud vs Non-Fraud)')
plt.xlabel('Transaction Amount')
plt.ylabel('Frequency -> (log scale)')
plt.show()

From the transaction amount distribution, we observe a highly right-skewed pattern with a long tail of extreme values. This indicates the presence of outliers and non-normality, which can negatively affect many machine learning models if not handled properly. The log scale helps reveal structure across different magnitudes. While amount alone does not clearly separate fraud from non-fraud, it remains an important supporting feature when combined with others.

In [None]:
plt.figure(figsize=(10,5))
sns.boxplot(x='Class', y='Amount', data=df)
plt.yscale('log')
plt.title('Distribution of Transaction Amounts by Class (log scale)')
plt.show()

Do fraud transactions generally involve higher amounts?
Answer:  ‚ùåNo ‚Äî median is lower

Are fraud transactions more variable?
Answer: ‚úÖ Yes ‚Äî larger spread

Can amount alone classify fraud?
Answer: ‚ùå No ‚Äî heavy overlap

Is amount still useful as a feature?
Answer: ‚úÖ Yes ‚Äî but after transformation and with other features

*******
Transaction amount is skewed, contains outliers, and overlaps across classes. Therefore, we should transform it (log), avoid rule-based thresholds, and combine it with other features using models robust to imbalance.

In [None]:
#feature engineering ->creating a transformed class
df['Log_Amount'] = np.log1p(df['Amount'])


In [None]:
df[['Amount', 'Log_Amount']].describe()


In [None]:
plt.figure(figsize=(8,5))
sns.histplot(df[df['Class']==0]['Log_Amount'], bins=50, label='Non-Fraud', log_scale=False)
sns.histplot(df[df['Class']==1]['Log_Amount'], bins=50, label='Fraud', color='red', log_scale=False)
plt.legend()
plt.title("Log_Amount Distribution by Class")
plt.show()

Is Log_Amount alone sufficient to classify fraud?

Answer: ‚ùå No
But it helps models learn patterns better when combined with others.

In [None]:
#prepration for modeling
X= df.drop(columns=['Class'])
y=df['Class']

In [None]:
#we will be using the train test split as the imbalance is extreme!

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=42
)


In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


BASE MODEL

In [None]:
from sklearn.linear_model import LogisticRegression

baseline_model = LogisticRegression(
    class_weight='balanced',
    max_iter=1000,
    random_state=42
)

baseline_model.fit(X_train_scaled, y_train)


In [None]:
y_pred = baseline_model.predict(X_test_scaled)


In [None]:
#model  evaluation
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))



In [None]:
#confusion matrix
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

plt.figure()
sns.heatmap(cm, annot=True, fmt='d')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - Logistic Regression Baseline")
plt.show()

TN | FP
_______
FN | TP





*   TP ‚Üí fraud caught ‚úÖ
*   FN ‚Üí fraud missed ‚ùå
*   FP ‚Üí false alarm
*   TN ‚Üí correct non-fraud





In [None]:
#Probability Threshold Tuning
y_proba = baseline_model.predict_proba(X_test_scaled)[:, 1]


In [None]:

threshold = 0.3
y_pred_custom = (y_proba >= threshold).astype(int)
#Instead of predictions, get probabilities

In [None]:
print(classification_report(y_test, y_pred_custom))


In [None]:
from sklearn.metrics import precision_recall_curve

precision, recall, thresholds = precision_recall_curve(y_test, y_proba)

plt.figure()
plt.plot(recall, precision)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision‚ÄìRecall Curve")
plt.show()


A MORE STRINGER MODEL

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(
    n_estimators=100,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)

rf_model.fit(X_train, y_train)


In [None]:
y_pred_rf = rf_model.predict(X_test)

print(classification_report(y_test, y_pred_rf))


| Model          | Recall (Fraud) | Precision | F1 |
| -------------- | -------------- | --------- | -- |
| Logistic (0.5) | ‚ùå              | ‚ùå         | ‚ùå  |
| Logistic (0.3) | ‚úÖ              | ‚ö†Ô∏è        | ‚úÖ  |
| Random Forest  | üî•             | üî•        | üî• |


using SMOTE to Handle Imbalance

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train_scaled, y_train)

In [None]:
pd.Series(y_train_smote).value_counts()

**Train Model on SMOTE Data**

**1)**  Logistic Regression on SMOTE

In [None]:
from sklearn.linear_model import LogisticRegression

lr_smote = LogisticRegression(max_iter=1000)
lr_smote.fit(X_train_smote, y_train_smote)

y_pred_lr_smote = lr_smote.predict(X_test)


In [None]:
print(classification_report(y_test, y_pred_lr_smote))


**2)**  Random Forest (SMOTE)

In [None]:
rf_smote = RandomForestClassifier(
    n_estimators=100,
    random_state=42,
    n_jobs=-1
)

rf_smote.fit(X_train_smote, y_train_smote)
y_pred_rf_smote = rf_smote.predict(X_test)

print(classification_report(y_test, y_pred_rf_smote))


**Model Comparison & Hyper parameter Tuning**

In [None]:
#helper function
from sklearn.metrics import classification_report, roc_auc_score

def evaluate_model(name, model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]

    print(f"\n===== {name} =====")
    print(classification_report(y_test, y_pred))
    print("ROC-AUC:", roc_auc_score(y_test, y_prob))


In [None]:
#logistic regression (Smote)
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_smote, y_train_smote)

evaluate_model("Logistic Regression (SMOTE)", lr, X_test, y_test)


In [None]:
#gradient boosting
from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier(random_state=42)
gb.fit(X_train_smote, y_train_smote)

evaluate_model("Gradient Boosting", gb, X_test, y_test)


In [None]:
rf = RandomForestClassifier(
    n_estimators=100,
    random_state=42,
    n_jobs=-1
)
rf.fit(X_train_smote, y_train_smote)

evaluate_model("Random Forest (SMOTE)", rf, X_test, y_test)


| Model                         | Precision | Recall   | F1   | ROC-AUC   |
| ----------------------------- | --------- | -------- | ---- | --------- |
| Logistic Regression           | low       | low      | low  | baseline  |
| Random Forest (SMOTE)         | **0.82**  | **0.70** | 0.76 | 0.956     |
| **Gradient Boosting (SMOTE)** | **0.13**  | **0.88** | 0.23 | **0.977** |


**Gradient Boosting**

Best recall ‚Üí catches most frauds

Best ROC-AUC ‚Üí best ranking ability

Lower precision ‚Üí more false alarms (acceptable in fraud)

üëâ In fraud detection:

**Missing a fraud is worse than flagging a normal transaction Gradient Boosting is a defensible final choice**

### Model Selection Summary

Three models were evaluated: Logistic Regression, Random Forest, and Gradient Boosting.
Due to extreme class imbalance, recall and ROC-AUC were prioritized over accuracy.

Gradient Boosting achieved the highest fraud recall (0.88) and ROC-AUC (0.97),
making it the preferred model for this problem despite lower precision.
Hyperparameter tuning was not performed due to computational constraints and
is considered future work.
