In [7]:
#Import necessary libraries
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [8]:
# Load the dataset (assuming the CSV file is named 'fraud_data.csv')
data = pd.read_csv('Fraud_data.csv')

In [9]:
#Explore the dataset
print(data.head(5))


   step      type    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0     1   PAYMENT   9839.64  C1231006815       170136.0       160296.36   
1     1   PAYMENT   1864.28  C1666544295        21249.0        19384.72   
2     1  TRANSFER    181.00  C1305486145          181.0            0.00   
3     1  CASH_OUT    181.00   C840083671          181.0            0.00   
4     1   PAYMENT  11668.14  C2048537720        41554.0        29885.86   

      nameDest  oldbalanceDest  newbalanceDest  isFraud  isFlaggedFraud  
0  M1979787155             0.0             0.0        0               0  
1  M2044282225             0.0             0.0        0               0  
2   C553264065             0.0             0.0        1               0  
3    C38997010         21182.0             0.0        1               0  
4  M1230701703             0.0             0.0        0               0  


In [10]:
# Preprocessing
le = LabelEncoder()
data['type'] = le.fit_transform(data['type'])
data['nameOrig'] = le.fit_transform(data['nameOrig'])
data['nameDest'] = le.fit_transform(data['nameDest'])

In [11]:
# Flag transactions above 200 as fraud
# Calculate the average of the "amount" column
average_amount = data["amount"].mean()

data['isFraud'] = (data['amount'] > average_amount).astype(int)
data.head(20)

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,3,9839.64,757869,170136.0,160296.36,1662094,0.0,0.0,0,0
1,1,3,1864.28,2188998,21249.0,19384.72,1733924,0.0,0.0,0,0
2,1,4,181.0,1002156,181.0,0.0,439685,0.0,0.0,0,0
3,1,1,181.0,5828262,181.0,0.0,391696,21182.0,0.0,0,0
4,1,3,11668.14,3445981,41554.0,29885.86,828919,0.0,0.0,0,0
5,1,3,7817.71,6026525,53860.0,46042.29,2247218,0.0,0.0,0,0
6,1,3,7107.77,1805947,183195.0,176087.23,2063363,0.0,0.0,0,0
7,1,3,7861.64,2999171,176087.23,168225.59,2314008,0.0,0.0,0,0
8,1,3,4024.36,869140,2671.0,0.0,768940,0.0,0.0,0,0
9,1,2,5337.77,5407276,41720.0,36382.23,282960,41898.0,40348.79,0,0


In [12]:
# Detect  fraud


data['isFraud'] = (data['newbalanceOrig'] > data['oldbalanceOrg']).astype(int)

data['isFraud'] = (data['newbalanceDest'] > data['oldbalanceDest']).astype(int)

data['isFraud'] = ((data['type'] != 3) & (data['type'] != 4) ).astype(int)

# Set isFraud to True where isFlaggedFraud is True
data.loc[data['isFlaggedFraud'] == 1, 'isFraud'] = 1


data.head(50)

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,3,9839.64,757869,170136.0,160296.36,1662094,0.0,0.0,0,0
1,1,3,1864.28,2188998,21249.0,19384.72,1733924,0.0,0.0,0,0
2,1,4,181.0,1002156,181.0,0.0,439685,0.0,0.0,0,0
3,1,1,181.0,5828262,181.0,0.0,391696,21182.0,0.0,1,0
4,1,3,11668.14,3445981,41554.0,29885.86,828919,0.0,0.0,0,0
5,1,3,7817.71,6026525,53860.0,46042.29,2247218,0.0,0.0,0,0
6,1,3,7107.77,1805947,183195.0,176087.23,2063363,0.0,0.0,0,0
7,1,3,7861.64,2999171,176087.23,168225.59,2314008,0.0,0.0,0,0
8,1,3,4024.36,869140,2671.0,0.0,768940,0.0,0.0,0,0
9,1,2,5337.77,5407276,41720.0,36382.23,282960,41898.0,40348.79,1,0


In [13]:
# Split the data into training and validation sets
X = data.drop(['isFraud', 'isFlaggedFraud'], axis=1)
y = data['isFraud']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

In [14]:
import time

start_time = time.time()

# Train the model
model = xgb.XGBClassifier()
model.fit(X_train, y_train)

end_time = time.time()
execution_time = end_time - start_time

print("Training time:", execution_time, "seconds")


Training time: 19.464669227600098 seconds


In [21]:
# Make predictions on the validation set
row_to_predict = X_val.iloc[[3]]
y_pred = model.predict(row_to_predict)



In [22]:
if y_pred[0] == 1:
    print("Suspected to be a Fraudlent Transaction.")
else:
    print("Not Fraud")


Not Fraud


### 