In [None]:
# Detecting Fraudulent Transactions Using a Random Forest Model
# Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE 

# Generate synthetic transaction data
np.random.seed(42)  # For reproducibility

num_samples = 5000

data = {
    "TransactionAmount": np.random.randint(10, 5000, num_samples),
    "TransactionTime": np.random.randint(0, 24, num_samples),
    "Location": np.random.randint(1, 20, num_samples),
    "Fraud": np.random.choice([0, 1], size=num_samples, p=[0.95, 0.05])  # 5% fraud
}

df = pd.DataFrame(data)

print("Sample Transaction Data:")
print(df.head(10))


In [None]:
# Features and target
X = df[["TransactionAmount", "TransactionTime", "Location"]]
y = df["Fraud"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
# Features and target
X = df[["TransactionAmount", "TransactionTime", "Location"]]
y = df["Fraud"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
# Apply SMOTE to balance the training set

# Apply SMOTE to balance classes
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Preserve column names to avoid warnings
X_train_resampled = pd.DataFrame(X_train_resampled, columns=X_train.columns)

# Check before and after counts
print("Before SMOTE:", y_train.value_counts())
print("After SMOTE:", y_train_resampled.value_counts())


In [None]:
# Initialize Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train_resampled, y_train_resampled)


In [None]:
# Predict on test set
y_pred = rf_model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Detailed classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)


In [None]:
import matplotlib.pyplot as plt

# Feature importance
importances = rf_model.feature_importances_
features = X.columns

plt.barh(features, importances)
plt.xlabel("Importance")
plt.ylabel("Feature")
plt.title("Feature Importance in Fraud Detection")
plt.show()
