# Fraud detection demo

## 1. Importing necessary libraries

In [4]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

## 2. Reading data

In [6]:
df = pd.read_csv("../data/fraud_data.csv")

In [7]:
df.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


### Encoding transaction type category labels

In [8]:
le = preprocessing.LabelEncoder()
df.type = le.fit_transform(df.type)

In [9]:
df.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,3,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,3,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,4,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,1,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,3,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [10]:
transactions, labels = df.drop(columns=['nameOrig', 'nameDest', 'isFraud', 'isFlaggedFraud'], axis=1), df['isFraud']

In [11]:
transactions.head()

Unnamed: 0,step,type,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest
0,1,3,9839.64,170136.0,160296.36,0.0,0.0
1,1,3,1864.28,21249.0,19384.72,0.0,0.0
2,1,4,181.0,181.0,0.0,0.0,0.0
3,1,1,181.0,181.0,0.0,21182.0,0.0
4,1,3,11668.14,41554.0,29885.86,0.0,0.0


In [12]:
print(f'Transaction parameters used for fraud detection: \n {list(transactions.columns)}')

Transaction parameters used for fraud detection: 
 ['step', 'type', 'amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']


In [13]:
example_transaction, is_fraud = transactions.iloc[0], labels[0]

In [14]:
example_transaction = np.array(example_transaction)
example_transaction = example_transaction.reshape((1, 7))

## 3. Perform inference

In [16]:
from hydrosdk import Cluster, Application
import grpc

In [17]:
cluster = Cluster(
    http_address="<hydrosphere-http-address>",
    grpc_address="<hydrosphere-grpc-address>",
    ssl=True,                                         # turn off, if your Hydrosphere instance doesn't have
    grpc_credentials=grpc.ssl_channel_credentials()   # TLS certificates installed
)

In [18]:
app = Application.find(cluster, "<application-name>")
app.lock_while_starting()
predictor = app.predictor()

In [19]:
result = predictor.predict({"features": example_transaction})

In [23]:
print("Predicted:", result["is_fraud"][0])
print("Actual:", bool(is_fraud))

Predicted: False
Actual: False
