In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

In [None]:
# Load the dataset
data_path = r"C:\Users\subra\Desktop\Projects\TechnoHacks Edutech\Fraud_Transaction_Detection\data\bank_transactions_data_2.csv"
df = pd.read_csv(data_path)


In [None]:
# Display basic information about the dataset
print("First five rows of the dataset:")
print(df.head())

print("\nSummary statistics of the dataset:")
print(df.describe())

print("\nDataset information:")
print(df.info())


In [None]:
# Check for missing values
print("\nMissing values in the dataset:")
print(df.isnull().sum())


In [None]:
# Handle missing values (if any)
df.fillna(df.mean(), inplace=True)


In [None]:
# Check for class imbalance
print("\nClass distribution in the target variable (fraud/non-fraud):")
print(df['is_fraud'].value_counts())


In [None]:
# Exploratory Data Analysis (EDA)
# Distribution of transaction amounts
plt.figure(figsize=(8, 6))
sns.histplot(df['transaction_amount'], kde=True, bins=30, color='blue')
plt.title("Distribution of Transaction Amounts")
plt.xlabel("Transaction Amount")
plt.ylabel("Frequency")
plt.show()


In [None]:
# Correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title("Correlation Heatmap")
plt.show()


In [None]:
# Boxplot of transaction amount by fraud status
plt.figure(figsize=(8, 6))
sns.boxplot(x='is_fraud', y='transaction_amount', data=df, palette='Set2')
plt.title("Transaction Amount by Fraud Status")
plt.xlabel("Fraud Status (0 = Non-Fraud, 1 = Fraud)")
plt.ylabel("Transaction Amount")
plt.show()


In [None]:
# Feature and target selection
X = df.drop(columns=['is_fraud'])  # Features
y = df['is_fraud']                # Target


In [None]:
# Scale numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)


In [None]:
# Train the model using Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


In [None]:
# Make predictions
y_pred = model.predict(X_test)


In [None]:
# Evaluate the model
print("\nAccuracy of the model:")
print(accuracy_score(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Non-Fraud', 'Fraud'], yticklabels=['Non-Fraud', 'Fraud'])
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()