<a href="https://colab.research.google.com/github/Abhinav2656/credit-card-fraud-detection/blob/main/GrowthLink.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas scikit-learn numpy imblearn

Collecting imblearn
  Downloading imblearn-0.0-py2.py3-none-any.whl.metadata (355 bytes)
Downloading imblearn-0.0-py2.py3-none-any.whl (1.9 kB)
Installing collected packages: imblearn
Successfully installed imblearn-0.0


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from imblearn.over_sampling import SMOTE
import numpy as np

# Load the dataset
file_path = "/content/creditcard.csv"

df = pd.read_csv(file_path)

## Feature Engineering
# Create transaction frequency feature
df['Transaction_Freq'] = df.groupby('Time')['Time'].transform('count')

# Create time-based spending pattern
df['Hour'] = (df['Time'] // 3600) % 24

# Simulate location mismatch feature
df['Location_Mismatch'] = np.random.choice([0, 1], size=len(df), p=[0.95, 0.05])

# Create spending deviation feature
df['Avg_Spend_Per_User'] = df.groupby('Time')['Amount'].transform('mean')
df['Spending_Deviation'] = abs(df['Amount'] - df['Avg_Spend_Per_User'])

# Drop unnecessary columns
X = df.drop(columns=['Class', 'Avg_Spend_Per_User'])  # Features
y = df['Class']  # Target

# Normalize 'Amount', 'Time', and new features
scaler = StandardScaler()
X[['Time', 'Amount', 'Transaction_Freq', 'Hour', 'Spending_Deviation']] = scaler.fit_transform(
    X[['Time', 'Amount', 'Transaction_Freq', 'Hour', 'Spending_Deviation']]
)

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split into train and test sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])

# Print performance metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC-AUC Score: {roc_auc:.4f}")


Accuracy: 0.9998
Precision: 0.9996
Recall: 1.0000
F1 Score: 0.9998
ROC-AUC Score: 1.0000
