In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

CLEANING, PREPARING AND TRAINING THE DATA

In [2]:

data = pd.read_csv('Financial.csv')
from sklearn.utils import resample
data = data.drop(['nameOrig', 'nameDest','step', 'isFlaggedFraud'], axis=1)
#USing Label Encoder for Type values for better predicition
label_encoder = LabelEncoder()
data['type'] = label_encoder.fit_transform(data['type'])
#Distinguising between 0 and 1
df_majority = data[data.isFraud == 0]
df_minority = data[data.isFraud == 1]
#UPSampling Data
df_minority_upsampled = resample(df_minority, replace=True, n_samples=len(df_majority),random_state=42)
df_up = pd.concat([df_majority, df_minority_upsampled])
#Features(X) and target Value(y)
X = df_up.drop('isFraud', axis=1)
y = df_up['isFraud']
#Splitting DataSet
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
#Using Model XGB
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
#Predicting Values
xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_test)

Parameters: { "use_label_encoder" } are not used.



ACCURACY OF THE MODEL

In [3]:

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Model Accuracy: 99.95%
Accuracy: 0.9994944194013521
[[246365    250]
 [     0 247866]]
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00    246615
         1.0       1.00      1.00      1.00    247866

    accuracy                           1.00    494481
   macro avg       1.00      1.00      1.00    494481
weighted avg       1.00      1.00      1.00    494481



In [4]:
#xgb_model.save_model('xgb_fraud_detection_model.json')

In [5]:
category_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

print("Category to Integer Mapping for 'type' column:")
print(category_mapping)

Category to Integer Mapping for 'type' column:
{'CASH_IN': 0, 'CASH_OUT': 1, 'DEBIT': 2, 'PAYMENT': 3, 'TRANSFER': 4, nan: 5}


In [6]:

data = pd.read_csv('Financial.csv')


fraud_data = data[data['isFraud'] == 1]

fraud_accounts = fraud_data[['nameDest', 'oldbalanceDest','newbalanceDest']]

fraud_accounts.columns = ['account_number', 'old_balance','new_balance']

fraud_account_data_path = 'fraud_accounts.csv'
fraud_accounts.to_csv(fraud_account_data_path, index=False)

print(f"Fraud account dataset created and saved to {fraud_account_data_path}.")



Fraud account dataset created and saved to fraud_accounts.csv.


In [7]:
def predict_transaction(account_number, input_features):
    fraud_accounts = pd.read_csv('fraud_accounts.csv')
    
    if account_number in fraud_accounts['account_number'].values:
        print("This account is already flagged as fraud.")
        return True
    
    possibility_of_fraud = xgb_model.predict(np.array(input_features).reshape(1, -1))[0]
    bank = pd.read_csv('Financial.csv')
    
    if possibility_of_fraud == 1:
        account_info = bank[bank['nameDest'] == account_number]
        balance = account_info['oldbalanceDest'].values[0]
        
        ans = input(f"This account seems to have a possibility of being fraud, would you like to flag this account {account_number} as fraud? Yes/No: ")
        
        new_entry = pd.DataFrame([[account_number, balance]], columns=['account_number', 'balance'])
        if ans.lower() == "yes":
            fraud_accounts = fraud_accounts.append(new_entry, ignore_index=True)  
            fraud_accounts.to_csv('fraud_accounts.csv', index=False)
            print(f"Account {account_number} has been flagged as fraud.")
            return True
        else:
            return True
    else:
        print(f"Transaction seems normal for account {account_number}.")
        return False


In [9]:

bank = pd.read_csv('Financial.csv')


account_number = input("Enter sender's account number: ")
reciever_acc_number = input("Enter receiver's account number: ")
amount = float(input("Enter the amount to pay: "))
Type = int(input("Enter type of transaction 'CASH_IN': 0, 'CASH_OUT': 1, 'DEBIT': 2, 'PAYMENT': 3, 'TRANSFER': 4: "))

account_info_sender = bank[bank['nameOrig'] == account_number]
account_info_reciever = bank[bank['nameDest'] == reciever_acc_number]

if not account_info_sender.empty and not account_info_reciever.empty:
    oldbalanceOrg = account_info_sender['oldbalanceOrg'].values[0]
    newbalanceOrig = account_info_sender['newbalanceOrig'].values[0]
    oldbalanceDest = account_info_reciever['oldbalanceDest'].values[0]
    newbalanceDest = account_info_reciever['newbalanceDest'].values[0]
    
    input_features = [Type, amount, oldbalanceOrg, newbalanceOrig, oldbalanceDest, newbalanceDest]
    
    print("Input Features: ", input_features)
    print("Data types: ", [type(f) for f in input_features])

    is_fraud = predict_transaction(reciever_acc_number, input_features)
    
    print(f"Fraudulent Transaction Prediction: {is_fraud}")
else:
    print("Invalid account number(s). Please check the sender or receiver account numbers.")


Invalid account number(s). Please check the sender or receiver account numbers.
