In [None]:
# Import basic libraries
import numpy as np
import pandas as pd

# Load dataset
df = pd.read_csv("/kaggle/input/fraud-detection-dataset/AIML Dataset.csv")

# Show first rows
df.head()

In [None]:
# Check fraud vs non-fraud
df['isFraud'].value_counts(normalize=True)

In [None]:
# Drop ID columns
df = df.drop(['nameOrig', 'nameDest'], axis=1)
df.head()

In [None]:
# One-hot encoding
df = pd.get_dummies(df, columns=['type'], drop_first=True)

df.head()

In [None]:
# Separate features and target
X = df.drop('isFraud', axis=1)
y = df['isFraud']

In [None]:
from sklearn.model_selection import train_test_split

# Stratified split
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

In [None]:
from sklearn.linear_model import LogisticRegression

# Model with class weights
model = LogisticRegression(
    class_weight='balanced',
    max_iter=1000
)

model.fit(X_train, y_train)

In [None]:
# Predict probabilities
y_probs = model.predict_proba(X_test)[:,1]

In [None]:
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score

# Compute PR curve
precision, recall, thresholds = precision_recall_curve(
    y_test, y_probs
)

# PR-AUC
pr_auc = average_precision_score(y_test, y_probs)

print("PR-AUC Score:", pr_auc)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8,6))

plt.plot(recall, precision)

plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")

plt.show()

In [None]:
from sklearn.metrics import f1_score

best_f1 = 0
best_threshold = 0

# Try all thresholds
for t in thresholds:
    
    y_pred = (y_probs >= t).astype(int)
    
    f1 = f1_score(y_test, y_pred)
    
    if f1 > best_f1:
        best_f1 = f1
        best_threshold = t

In [None]:
print("Best Threshold:", best_threshold)
print("Best F1 Score:", best_f1)

In [None]:
from imblearn.over_sampling import SMOTE

# Apply SMOTE
smote = SMOTE(random_state=42)

X_sm, y_sm = smote.fit_resample(X_train, y_train)

# Train again
model_sm = LogisticRegression(max_iter=1000)

model_sm.fit(X_sm, y_sm)

# Predict
y_probs_sm = model_sm.predict_proba(X_test)[:,1]

print("PR-AUC (SMOTE):",
      average_precision_score(y_test, y_probs_sm))