In [None]:
!pip install numpy
!pip install pandas
!pip install scikit-learn
!pip install imbalanced-learn



In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import RandomOverSampler, SMOTE, BorderlineSMOTE, ADASYN
from sklearn.inspection import permutation_importance, PartialDependenceDisplay
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedShuffleSplit
from sklearn.metrics import accuracy_score, precision_score, recall_score, fbeta_score, precision_recall_fscore_support, roc_curve, roc_auc_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.kernel_approximation import Nystroem

import warnings
warnings.simplefilter("ignore")

In [None]:
rs = 12

In [None]:
df = pd.read_csv("FraudData_sampled.csv", delimiter=";")

In [None]:
df.head()

Unnamed: 0,type,amount,originOldBalance,originNewBalance,destinationOldBalance,destinationNewBalance,isFraud
0,CASH_IN,148753.5,24482.0,173235.5,315720.9,166967.4,0
1,PAYMENT,10180.55,8680.01,0.0,0.0,0.0,0
2,PAYMENT,14806.33,25871.46,11065.14,0.0,0.0,0
3,PAYMENT,38336.09,10445.0,0.0,0.0,0.0,0
4,CASH_OUT,188096.77,0.0,0.0,391329.68,579426.45,0


In [None]:
df = pd.get_dummies(df, columns=['type'], dtype='int')

In [None]:
df

Unnamed: 0,amount,originOldBalance,originNewBalance,destinationOldBalance,destinationNewBalance,isFraud,type_CASH_IN,type_CASH_OUT,type_CREDIT,type_DEPOSIT,type_PAYMENT,type_TRANSFER
0,148753.50,24482.00,173235.50,315720.90,166967.40,0,1,0,0,0,0,0
1,10180.55,8680.01,0.00,0.00,0.00,0,0,0,0,0,1,0
2,14806.33,25871.46,11065.14,0.00,0.00,0,0,0,0,0,1,0
3,38336.09,10445.00,0.00,0.00,0.00,0,0,0,0,0,1,0
4,188096.77,0.00,0.00,391329.68,579426.45,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
99995,31017.01,17678.13,0.00,0.00,0.00,0,0,0,0,0,1,0
99996,46925.78,0.00,0.00,0.00,0.00,0,0,0,0,0,1,0
99997,12553.98,39628.43,27074.45,0.00,0.00,0,0,0,0,0,1,0
99998,113053.45,22750.00,135803.45,0.00,0.00,0,1,0,0,0,0,0


In [None]:
target = 'isFraud'
features = df.columns.drop(target)
x_data = df[features]
y_data = df[target]

In [None]:
x_data

Unnamed: 0,amount,originOldBalance,originNewBalance,destinationOldBalance,destinationNewBalance,type_CASH_IN,type_CASH_OUT,type_CREDIT,type_DEPOSIT,type_PAYMENT,type_TRANSFER
0,148753.50,24482.00,173235.50,315720.90,166967.40,1,0,0,0,0,0
1,10180.55,8680.01,0.00,0.00,0.00,0,0,0,0,1,0
2,14806.33,25871.46,11065.14,0.00,0.00,0,0,0,0,1,0
3,38336.09,10445.00,0.00,0.00,0.00,0,0,0,0,1,0
4,188096.77,0.00,0.00,391329.68,579426.45,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
99995,31017.01,17678.13,0.00,0.00,0.00,0,0,0,0,1,0
99996,46925.78,0.00,0.00,0.00,0.00,0,0,0,0,1,0
99997,12553.98,39628.43,27074.45,0.00,0.00,0,0,0,0,1,0
99998,113053.45,22750.00,135803.45,0.00,0.00,1,0,0,0,0,0


In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, train_size=0.65, stratify=y_data, random_state=rs)

In [None]:
x_train

Unnamed: 0,amount,originOldBalance,originNewBalance,destinationOldBalance,destinationNewBalance,type_CASH_IN,type_CASH_OUT,type_CREDIT,type_DEPOSIT,type_PAYMENT,type_TRANSFER
11712,2347959.58,8077.00,0.00,25892.60,2373852.18,0,0,0,0,0,1
65224,236621.81,0.00,0.00,2108322.05,2344943.86,0,1,0,0,0,0
28207,109203.96,0.00,0.00,1927981.53,2037185.49,0,1,0,0,0,0
15370,21154.53,697457.44,676302.90,3126798.99,3160350.53,0,0,0,0,0,1
29272,98800.37,10832.60,0.00,148640.03,247440.40,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
40981,219452.34,41858.00,261310.34,627836.62,596777.90,1,0,0,0,0,0
87437,25036.01,0.00,0.00,0.00,0.00,0,0,0,0,1,0
94973,9932.32,37422.00,27489.68,0.00,0.00,0,0,0,0,1,0
78690,19267.14,364779.52,384046.65,335917.93,316650.80,1,0,0,0,0,0


In [None]:
LR = LogisticRegression(solver='saga', random_state=rs)
model = LR.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')

Accuracy: 0.9987142857142857


In [None]:
y_pred

array([0, 0, 0, ..., 0, 0, 0])

In [None]:
import pickle

pkl_filename = "model.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(model, file)