In [None]:
pip install pandas numpy scikit-learn xgboost

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import xgboost as xgb

In [None]:
# Load the engineered dataset
df = pd.read_csv('mobile_money_features.csv')
print("Dataset loaded. Shape:", df.shape)

In [None]:
# Define features and target
features = [
    'Amount', 'Rolling_Count_1h', 'Time_Delta', 'Rapid_Transaction',
    'Avg_Amount', 'Max_Amount', 'Min_Amount', 'Location_Change',
    'Unique_Devices', 'Send_Money_Ratio', 'Hour_of_Day'
]
X = df[features]
y = df['Fraud_Label']

In [None]:
# Encode categorical features (none in this subset, but included for completeness)
# If you want to use 'Location' or 'Transaction_Type', uncomment and encode:
# le = LabelEncoder()
# X['Location'] = le.fit_transform(df['Location'])
# X['Transaction_Type'] = le.fit_transform(df['Transaction_Type'])

In [None]:
# Split into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Calculate class weights to handle imbalance (fraud is rare)
scale_pos_weight = (y == 0).sum() / (y == 1).sum()  # Ratio of negative to positive samples

In [None]:
# Initialize and train XGBoost model
model = xgb.XGBClassifier(
    scale_pos_weight=scale_pos_weight,  # Adjust for imbalance
    use_label_encoder=False,            # Avoid deprecation warning
    eval_metric='logloss',              # Loss function
    random_state=42
)
model.fit(X_train, y_train)

In [None]:
# Predict on test set
y_pred = model.predict(X_test)

In [None]:
# Evaluate the model
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Legit', 'Fraud']))

In [None]:
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

In [None]:
# Feature importance
feature_importance = pd.DataFrame({
    'Feature': features,
    'Importance': model.feature_importances_
}).sort_values('Importance', ascending=False)
print("\nFeature Importance:")
print(feature_importance)

In [None]:
# Save the predictions
df_test = X_test.copy()
df_test['Actual_Label'] = y_test
df_test['Predicted_Label'] = y_pred
df_test.to_csv('fraud_predictions.csv', index=False)
print("Predictions saved as 'fraud_predictions.csv'")