In [1]:
import pandas as pd
import numpy as np


np.random.seed(42)


n = 500


transaction_ids = np.arange(1001, 1001 + n)


types = np.random.choice(['credit', 'debit'], size=n)


amounts = np.concatenate([
    np.random.normal(100, 20, int(n * 0.9)),   # normal transactions
    np.random.normal(1000, 300, int(n * 0.1))  # suspicious (possibly fraudulent)
])
amounts = np.abs(amounts.round(2))  

# IsFraud: 5% fraud rate overall, more likely if amount is high
is_fraud = []
for amt in amounts:
    if amt > 800:
        is_fraud.append(np.random.choice([0, 1], p=[0.6, 0.4]))  # 40% fraud if high amount
    else:
        is_fraud.append(np.random.choice([0, 1], p=[0.98, 0.02]))  # 2% fraud otherwise


df = pd.DataFrame({
    'TransactionID': transaction_ids,
    'Amount': amounts,
    'Type': types,
    'IsFraud': is_fraud
})


df.to_csv("fraud_detection.csv", index=False)

print("✅ Dataset created and saved as 'fraud_detection.csv'")
df.head()


✅ Dataset created and saved as 'fraud_detection.csv'


Unnamed: 0,TransactionID,Amount,Type,IsFraud
0,1001,83.06,credit,0
1,1002,69.7,debit,0
2,1003,91.07,credit,0
3,1004,117.13,credit,0
4,1005,104.28,credit,0


In [2]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score


df = pd.read_csv("fraud_detection.csv")  # Place your actual file path here
print("Initial Data Snapshot:\n", df.head())


print("\nMissing Values:\n", df.isnull().sum())


le = LabelEncoder()
df['Type'] = le.fit_transform(df['Type'])  # e.g., credit=0, debit=1


df['LogAmount'] = np.log1p(df['Amount'])


X = df[['Amount', 'Type', 'LogAmount']]
y = df['IsFraud']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("\nAccuracy Score:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Initial Data Snapshot:
    TransactionID  Amount    Type  IsFraud
0           1001   83.06  credit        0
1           1002   69.70   debit        0
2           1003   91.07  credit        0
3           1004  117.13  credit        0
4           1005  104.28  credit        0

Missing Values:
 TransactionID    0
Amount           0
Type             0
IsFraud          0
dtype: int64

Accuracy Score: 0.92

Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.95      0.96       143
           1       0.22      0.29      0.25         7

    accuracy                           0.92       150
   macro avg       0.59      0.62      0.60       150
weighted avg       0.93      0.92      0.92       150

