<a href="https://colab.research.google.com/github/SWETHAUMAPATHI/NM-PHASE-2/blob/main/CREDIT_CARD_FRAUD_DETECTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install imbalanced-learn plotly --quiet

# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from imblearn.over_sampling import SMOTE
from google.colab import files

# Upload dataset
print("üìÅ Upload the file: Credit_Card_Applications (1) (1).csv")
uploaded = files.upload()
df = pd.read_csv("Credit_Card_Applications (1) (1).csv")

# Basic info
print("\n‚úÖ Dataset Loaded")
print("Shape:", df.shape)
print("Columns:", list(df.columns))
df.head()

# Handle categorical features
for col in df.select_dtypes(include='object'):
    df[col] = LabelEncoder().fit_transform(df[col])

# Handle missing values (if any)
df.fillna(df.median(numeric_only=True), inplace=True)

# Assume the last column is the target
target_col = df.columns[-1]
X = df.drop(columns=[target_col])
y = df[target_col]

# Normalize features
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_resampled, y_resampled)

# Evaluate
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

# Classification report
print("\nüìä Classification Report:\n")
print(classification_report(y_test, y_pred))
print("üéØ ROC AUC Score:", round(roc_auc_score(y_test, y_proba), 4))

# Confusion matrix visualization
cm = confusion_matrix(y_test, y_pred)
fig_cm = go.Figure(data=go.Heatmap(
    z=cm,
    x=['Predicted 0', 'Predicted 1'],
    y=['Actual 0', 'Actual 1'],
    colorscale='Blues'
))
fig_cm.update_layout(title="Confusion Matrix", xaxis_title="Prediction", yaxis_title="Actual")
fig_cm.show()

# Visualize target distribution
fig_dist = px.histogram(df, x=target_col, title="Class Distribution", color=df[target_col].astype(str))
fig_dist.show()

# Predict a random transaction
sample = X_test.sample(1)
sample_pred = model.predict(sample)[0]
sample_prob = model.predict_proba(sample)[0][1]

print("\nüîç Random Transaction Prediction")
print("Predicted Class:", "FRAUD" if sample_pred == 1 else "LEGIT")
print(f"Fraud Probability: {round(sample_prob*100, 2)}%")

üìÅ Upload the file: Credit_Card_Applications (1) (1).csv


Saving Credit_Card_Applications (1) (1).csv to Credit_Card_Applications (1) (1).csv

‚úÖ Dataset Loaded
Shape: (690, 16)
Columns: ['CustomerID', 'A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10', 'A11', 'A12', 'A13', 'A14', 'Class']

üìä Classification Report:

              precision    recall  f1-score   support

           0       0.89      0.92      0.90        87
           1       0.85      0.80      0.83        51

    accuracy                           0.88       138
   macro avg       0.87      0.86      0.87       138
weighted avg       0.88      0.88      0.88       138

üéØ ROC AUC Score: 0.9197



üîç Random Transaction Prediction
Predicted Class: LEGIT
Fraud Probability: 7.0%
