# Fraud Detection with Decision Trees

## Section 1: Feature Engineering & Model Tuning

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

# Load dataset
df = pd.read_csv('fraud_detection.csv')

# Inspect missing values
print("Missing Values:
", df.isnull().sum())

# Label Encoding for categorical features
label_encoder = LabelEncoder()
df['Type'] = label_encoder.fit_transform(df['Type'])

# Feature Engineering: Creating a new feature (e.g., log-transformed amount)
df['LogAmount'] = np.log1p(df['Amount'])

# Split dataset
X = df[['Amount', 'Type', 'LogAmount']]
y = df['Is Fraud']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Decision Tree
dt = DecisionTreeClassifier(random_state=42)

# Hyperparameter tuning with GridSearchCV
param_grid = {
    'max_depth': [3, 5, 10, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(dt, param_grid, cv=5, scoring='f1', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", grid_search.best_params_)

# Train the best model
best_dt = grid_search.best_estimator_
y_pred = best_dt.predict(X_test)

# Evaluation
print("Classification Report:
", classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


## Section 2: Fraud Detection with Decision Trees

### Objective
Detect fraudulent transactions based on patterns in financial data.


In [None]:
# Step 1: Load & Preprocess the Dataset
df = pd.read_csv('fraud_detection.csv')

# Check for missing values
print("Missing Values:
", df.isnull().sum())

# Convert categorical variables using label encoding
df['Type'] = label_encoder.fit_transform(df['Type'])

# Step 2: Feature Engineering
# Creating new features (e.g., Transaction Amount squared)
df['Amount_Squared'] = df['Amount'] ** 2

# Step 3: Train a Decision Tree Classifier
X = df[['Amount', 'Type', 'LogAmount', 'Amount_Squared']]
y = df['Is Fraud']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42, **grid_search.best_params_)
dt_model.fit(X_train, y_train)

# Step 4: Evaluate Model Performance
y_pred = dt_model.predict(X_test)

# Print classification metrics
print("Classification Report:
", classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
