In [27]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
data = pd.read_csv("/content/credit card.csv")

In [3]:
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [19]:
data.duplicated().sum()

np.int64(0)

In [4]:
data.isnull().sum()

Unnamed: 0,0
step,0
type,0
amount,0
nameOrig,0
oldbalanceOrg,0
newbalanceOrig,0
nameDest,0
oldbalanceDest,0
newbalanceDest,0
isFraud,0


In [5]:
data.shape

(1048575, 11)

In [6]:
data["type"].unique()

array(['PAYMENT', 'TRANSFER', 'CASH_OUT', 'DEBIT', 'CASH_IN'],
      dtype=object)

In [7]:
data["type"] = data["type"].map({ "PAYMENT" : 0,
                                 "TRANSFER" : 1,
                                  "CASH_OUT": 2,
                                  "DEBIT" : 3,
                                  "CASH_IN": 4
                                })

In [8]:
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,0,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,0,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,1,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,2,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,0,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [9]:
X = data[["type","amount","oldbalanceOrg","newbalanceOrig"]]
y = data["isFraud"]

In [10]:
X.head()

Unnamed: 0,type,amount,oldbalanceOrg,newbalanceOrig
0,0,9839.64,170136.0,160296.36
1,0,1864.28,21249.0,19384.72
2,1,181.0,181.0,0.0
3,2,181.0,181.0,0.0
4,0,11668.14,41554.0,29885.86


In [11]:
y.head()

Unnamed: 0,isFraud
0,0
1,0
2,1
3,1
4,0


In [12]:
x_train,x_test,y_train,y_test = train_test_split(X,y, test_size=0.1,random_state=42)

In [13]:
x_test.shape

(104858, 4)

In [15]:
x_train.shape

(943717, 4)

In [16]:
scaler = StandardScaler()
x_train_norm = scaler.fit_transform(x_train)
x_test_norm = scaler.transform(x_test)

In [20]:
model = LogisticRegression()

In [21]:
model.fit(x_train_norm,y_train)

In [22]:
y_pred = model.predict(x_test_norm)

In [28]:
acc = accuracy_score(y_test,y_pred)
acc

0.9990463293215587

In [29]:
results = pd.DataFrame({"Actual" : y_test , "Predicted" : y_pred})

In [30]:
results.head()

Unnamed: 0,Actual,Predicted
781974,0,0
937737,0,0
907828,0,0
784628,0,0
662460,0,0


In [31]:
anomalies = []
count = 0
for i in range(len(x_test)):
  if y_test.iloc[i] != y_pred[i]:
    anomalies.append(f"{i} {y_test.iloc[i]}      {y_pred[i]}")
    count = count + 1

In [34]:
print(f"Number of anomalies = {count} / {len(y_test)}")

Number of anomalies = 100 / 104858


In [37]:
print(f"ACTUAL     PREDICTED")
for i in range(len(anomalies)):
  print(anomalies[i])

ACTUAL     PREDICTED
147 1      0
417 1      0
577 1      0
731 1      0
3866 1      0
6007 1      0
6077 1      0
6832 1      0
7226 1      0
8105 1      0
8252 1      0
8286 1      0
10435 1      0
10540 1      0
10600 1      0
10734 1      0
10892 1      0
14602 0      1
15081 1      0
15660 1      0
17647 1      0
17793 1      0
18225 1      0
21401 1      0
21837 1      0
21863 1      0
22318 1      0
25781 1      0
26463 1      0
26561 1      0
26840 1      0
27546 1      0
31191 1      0
31888 1      0
33294 1      0
35856 1      0
37694 0      1
39111 1      0
40478 1      0
41194 1      0
41950 1      0
42588 1      0
42746 1      0
48261 1      0
48642 0      1
48700 1      0
49048 1      0
49900 1      0
51069 1      0
54124 1      0
54311 1      0
56213 1      0
56951 1      0
57437 0      1
58795 1      0
59601 1      0
59889 1      0
60922 1      0
61995 1      0
62483 1      0
64558 1      0
65542 1      0
65604 1      0
68861 1      0
69634 1      0
70069 1      0
70930