In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the data
transactions = pd.read_csv("Data/transactions.csv")
transactions

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isPayment,isMovement,accountDiff
0,206,CASH_OUT,62927.08,C473782114,0.00,0.00,C2096898696,649420.67,712347.75,0,0,1,649420.67
1,380,PAYMENT,32851.57,C1915112886,0.00,0.00,M916879292,0.00,0.00,0,1,0,0.00
2,570,CASH_OUT,1131750.38,C1396198422,1131750.38,0.00,C1612235515,313070.53,1444820.92,1,0,1,818679.85
3,184,CASH_OUT,60519.74,C982551468,60519.74,0.00,C1378644910,54295.32,182654.50,1,0,1,6224.42
4,162,CASH_IN,46716.01,C1759889425,7668050.60,7714766.61,C2059152908,2125468.75,2078752.75,0,0,0,5542581.85
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,298,CASH_OUT,173833.78,C1112634506,16302.00,0.00,C996800768,316909.31,490743.09,0,0,1,300607.31
996,58,TRANSFER,561948.38,C856217790,561948.38,0.00,C1278181974,0.00,0.00,1,0,1,561948.38
997,72,CASH_OUT,622235.32,C615309889,622235.32,0.00,C755984599,3377968.96,4000204.28,1,0,1,2755733.64
998,178,CASH_OUT,119604.13,C42162938,30678.00,0.00,C540527919,22457787.17,22577391.30,0,0,1,22427109.17


In [2]:
# Fraudulent transactions
fraudulent_transactions = transactions[transactions["isFraud"] == 1]
num_fraudulent_transactions = len(fraudulent_transactions)
print(num_fraudulent_transactions)

282


In [5]:
# Create isPayment, isMovement, accountDiff fields
transactions["isPayment"] = ((transactions["type"] == "PAYMENT") | (transactions["type"] == "DEBIT")).astype(int)
transactions["isMovement"] = ((transactions["type"] == "CASH_OUT") | (transactions["type"] == "TRANSFER")).astype(int)
transactions["accountDiff"] = abs(transactions["oldbalanceOrg"] - transactions["oldbalanceDest"])

transactions.head(5)

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isPayment,isMovement,accountDiff
0,206,CASH_OUT,62927.08,C473782114,0.0,0.0,C2096898696,649420.67,712347.75,0,0,1,649420.67
1,380,PAYMENT,32851.57,C1915112886,0.0,0.0,M916879292,0.0,0.0,0,1,0,0.0
2,570,CASH_OUT,1131750.38,C1396198422,1131750.38,0.0,C1612235515,313070.53,1444820.92,1,0,1,818679.85
3,184,CASH_OUT,60519.74,C982551468,60519.74,0.0,C1378644910,54295.32,182654.5,1,0,1,6224.42
4,162,CASH_IN,46716.01,C1759889425,7668050.6,7714766.61,C2059152908,2125468.75,2078752.75,0,0,0,5542581.85


In [6]:
# Create features and label variables
label = transactions["isFraud"]
features = pd.DataFrame(transactions, columns=["amount", "isPayment", "isMovement", "accountDiff"])

# Split dataset
features_train, features_test, label_train, label_test = train_test_split(features, label, test_size=0.3, random_state=42)

# Normalize the features variables
scaler = StandardScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)

# Fit the model to the training data
model = LogisticRegression()
model.fit(features_train_scaled, label_train)

# Score the model on the training data
training_score = model.score(features_train_scaled, label_train)
print(f"Training score: {training_score}")

# Score the model on the test data
test_score = model.score(features_test_scaled, label_test)
print(f"Test score: {test_score}")

# Print the model coefficients
print(model.coef_)

Training score: 0.8385714285714285
Test score: 0.85
[[ 2.42110403 -0.61050379  2.10147921 -0.987915  ]]


In [7]:
# New transaction data
transaction1 = np.array([123456.78, 0.0, 1.0, 54670.1])
transaction2 = np.array([98765.43, 1.0, 0.0, 8524.75])
transaction3 = np.array([543678.31, 1.0, 0.0, 510025.5])
transaction4 = np.array([825761.25, 0.0, 1.0, 218712.8])

# Combine new transactions into a single array
sample_transactions = np.stack([transaction1, transaction2, transaction3, transaction4])
print(sample_transactions)

# Normalize the new transactions
sample_transactions = scaler.transform(sample_transactions)
print(sample_transactions)

# Predict fraud on the new transactions
predictions = model.predict(sample_transactions)
print(predictions)

# Show probabilities on the new transactions
predictions_probabilities = model.predict_proba(sample_transactions)
print(predictions_probabilities)

[[1.2345678e+05 0.0000000e+00 1.0000000e+00 5.4670100e+04]
 [9.8765430e+04 1.0000000e+00 0.0000000e+00 8.5247500e+03]
 [5.4367831e+05 1.0000000e+00 0.0000000e+00 5.1002550e+05]
 [8.2576125e+05 0.0000000e+00 1.0000000e+00 2.1871280e+05]]
[[-0.27885917 -0.53550416  0.81892881 -0.46978056]
 [-0.29594918  1.86739912 -1.22110737 -0.48268021]
 [ 0.01199527  1.86739912 -1.22110737 -0.34248879]
 [ 0.20723772 -0.53550416  0.81892881 -0.42392344]]
[0 0 0 1]
[[0.61352284 0.38647716]
 [0.99806334 0.00193666]
 [0.99645189 0.00354811]
 [0.33862146 0.66137854]]


