In [10]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Load dataset
data = pd.read_csv('transactions.csv')

# Normalize column names: Remove spaces and convert to lowercase
data.columns = data.columns.str.strip().str.lower()

# Print actual column names for debugging
print("Dataset Columns:", data.columns.tolist())

# Rename columns to match expected feature names
data.rename(columns={
    'amount (inr)': 'amount', 
    'status': 'status',
    'sender upi id': 'sender_upi',
    'receiver upi id': 'receiver_upi'
}, inplace=True)

# Extract additional features from timestamp if available
if 'timestamp' in data.columns:
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data['hour'] = data['timestamp'].dt.hour
    data['day'] = data['timestamp'].dt.day
    data['month'] = data['timestamp'].dt.month
else:
    data['hour'], data['day'], data['month'] = 0, 0, 0  # Placeholder values if timestamp is missing

# Define categorical columns
categorical_columns = ["sender_upi", "receiver_upi", "status"]

# Initialize encoders dictionary
encoders = {}

# Encode categorical features
for col in categorical_columns:
    if col in data.columns:
        encoders[col] = LabelEncoder()
        data[col] = encoders[col].fit_transform(data[col])
        print(f"✅ Encoder created for: {col}")
    else:
        print(f"⚠️ Warning: '{col}' column missing in dataset!")

# Selecting relevant features
features = ['amount', 'hour', 'day', 'month'] + [col for col in ['status', 'sender_upi', 'receiver_upi'] if col in data.columns]
X = data[features]
y = np.random.randint(0, 2, size=len(data))  # Placeholder for fraud labels since not in dataset

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save model and encoders
joblib.dump(model, 'fraud_detection_model.pkl')
joblib.dump(encoders, 'label_encoders.pkl')

print("✅ Model and encoders saved successfully as fraud_detection_model.pkl and label_encoders.pkl!")

Dataset Columns: ['transaction id', 'timestamp', 'sender name', 'sender upi id', 'receiver name', 'receiver upi id', 'amount (inr)', 'status']
✅ Encoder created for: status
✅ Model and encoders saved successfully as fraud_detection_model.pkl and label_encoders.pkl!
