In [None]:
!pip install requests
!pip install pandas
!pip install numpy
!pip install scikit-learn
!pip install requests 

In [1]:
import pandas as pd
import numpy as np

# Data Preparation

In [2]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [3]:
%%time
df = pd.read_csv("data/PS_20174392719_1491204439457_log.tar.gz", compression="gzip")

CPU times: user 16.9 s, sys: 1.48 s, total: 18.3 s
Wall time: 18.4 s


In [4]:
df.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,3,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,3,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,4,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,1,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,3,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [5]:
# Transform type column to be category
df['type'] = df['type'].astype('category')
df['type'] = df['type'].cat.codes

In [6]:
train = df[df['isFraud'] == 0]
test = df[df['isFraud'] == 1]

In [7]:
clean, fraudulent = train_test_split(train, test_size=0.05)
fraudulent = pd.concat([test, fraudulent]).sample(frac=1).reset_index(drop=True)
fraudulent, fraudulent_labels = fraudulent.drop(columns=['nameOrig', 'nameDest', 'isFraud', 'isFlaggedFraud'], axis=1), fraudulent['isFraud']
clean = clean.drop(columns=['nameOrig', 'nameDest', 'isFraud', 'isFlaggedFraud'], axis=1)

In [8]:
clean.head()

Unnamed: 0,step,type,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest
1484863,141,1,33107.12,0.0,0.0,4245938.03,4279045.14
355012,17,0,279226.33,2512072.07,2791298.4,1417433.03,1138206.7
4442671,323,1,349919.29,509.0,0.0,376458.59,726377.88
2340225,189,1,85566.7,0.0,0.0,338844.19,424410.89
5419481,378,3,11770.45,0.0,0.0,0.0,0.0


# Inference

In [9]:
import requests
import time 

For Autoencoder monitoring use threshold __7362628712__

In [10]:
HOST_ADDRESS = "https://demo.k8s.hydrosphere.io"
HOST_ADDRESS = "http://localhost"

In [11]:
def send_data(data, labels=None):
    global HOST_ADDRESS, APPLICATION_NAME, SIGNATURE_NAME
    print("Using URL", f"{HOST_ADDRESS}/gateway/applications/{APPLICATION_NAME}/{SIGNATURE_NAME}")
    for index, point in data.iterrows():
        point = [point.values.tolist()]
        print(point)
        
        r = requests.post(
            url=f"{HOST_ADDRESS}/gateway/applications/{APPLICATION_NAME}/{SIGNATURE_NAME}", 
            json={'features': point})
        
        if labels is not None:
            print(r.json(), bool(labels.loc[index]))
        else:
            print(r.json())
        time.sleep(0.6)

### Isolation Forests

In [12]:
APPLICATION_NAME = "fraud_if"
SIGNATURE_NAME = "infer"

In [13]:
send_data(clean)

Using URL http://localhost/gateway/applications/fraud_if/infer
[[141.0, 1.0, 33107.12, 0.0, 0.0, 4245938.03, 4279045.14]]
{'is_fraud': [False]}
[[17.0, 0.0, 279226.33, 2512072.07, 2791298.4, 1417433.03, 1138206.7]]
{'is_fraud': [False]}
[[323.0, 1.0, 349919.29, 509.0, 0.0, 376458.59, 726377.88]]
{'is_fraud': [False]}
[[189.0, 1.0, 85566.7, 0.0, 0.0, 338844.19, 424410.89]]
{'is_fraud': [False]}
[[378.0, 3.0, 11770.45, 0.0, 0.0, 0.0, 0.0]]
{'is_fraud': [False]}
[[254.0, 0.0, 151322.84, 5743.0, 157065.84, 5470448.42, 5319125.59]]
{'is_fraud': [False]}


KeyboardInterrupt: 

In [None]:
send_data(fraudulent, fraudulent_labels)

### Random Forests

In [47]:
APPLICATION_NAME = "fraud_rf"
SIGNATURE_NAME = "infer"

In [48]:
send_data(clean)

Using URL http://localhost/gateway/applications/fraud_rf/infer
[[185.0, 4.0, 2684305.86, 0.0, 0.0, 2752653.37, 5436959.23]]
{'is_fraud': [False]}
[[236.0, 3.0, 11322.73, 12077.0, 754.27, 0.0, 0.0]]
{'is_fraud': [False]}
[[133.0, 1.0, 245843.27, 0.0, 0.0, 689187.18, 935030.46]]
{'is_fraud': [False]}
[[308.0, 1.0, 25031.71, 241281.0, 216249.29, 0.0, 25031.71]]
{'is_fraud': [False]}
[[302.0, 3.0, 7484.46, 0.0, 0.0, 0.0, 0.0]]
{'is_fraud': [False]}
[[375.0, 0.0, 27578.53, 21331.0, 48909.53, 583781.06, 556202.53]]
{'is_fraud': [False]}


KeyboardInterrupt: 

In [None]:
send_data(fraudulent, fraudulent_labels)