# **INITIATION**

In [None]:
# # Import Library

import numpy as np
import pandas as pd
import matplotlib as plt
import random

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler

# tensorflow
import tensorflow as tf
import tensorflow_datasets as tfds

# **DATA COLLECTING**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
FRAUD_DETECTION_PATH = "/content/drive/MyDrive/CAPSTONE 2023/dataset/Fraud Detection Dataset/transactions/transactions.txt"
FINDING_DONORS_PATH = "/content/drive/MyDrive/CAPSTONE 2023/dataset/Finding Donors for CharityML/census.csv"
ONLINE_FRAUD_PATH = "/content/drive/MyDrive/CAPSTONE 2023/dataset/Online Payments Fraud Detection Dataset/PS_20174392719_1491204439457_log.csv"

# Fraud Detection DF

In [None]:
fraud_df = pd.read_json(FRAUD_DETECTION_PATH, lines=True)

In [None]:
fraud_df.columns

Index(['accountNumber', 'customerId', 'creditLimit', 'availableMoney',
       'transactionDateTime', 'transactionAmount', 'merchantName',
       'acqCountry', 'merchantCountryCode', 'posEntryMode', 'posConditionCode',
       'merchantCategoryCode', 'currentExpDate', 'accountOpenDate',
       'dateOfLastAddressChange', 'cardCVV', 'enteredCVV', 'cardLast4Digits',
       'transactionType', 'echoBuffer', 'currentBalance', 'merchantCity',
       'merchantState', 'merchantZip', 'cardPresent', 'posOnPremises',
       'recurringAuthInd', 'expirationDateKeyInMatch', 'isFraud'],
      dtype='object')

merchantName, merchantCategoryCode, transactionType, MerchantCity, isFraud

In [None]:
fraud_df.iloc[:10, :]

Unnamed: 0,accountNumber,customerId,creditLimit,availableMoney,transactionDateTime,transactionAmount,merchantName,acqCountry,merchantCountryCode,posEntryMode,...,echoBuffer,currentBalance,merchantCity,merchantState,merchantZip,cardPresent,posOnPremises,recurringAuthInd,expirationDateKeyInMatch,isFraud
0,737265056,737265056,5000,5000.0,2016-08-13T14:27:32,98.55,Uber,US,US,2,...,,0.0,,,,False,,,False,False
1,737265056,737265056,5000,5000.0,2016-10-11T05:05:54,74.51,AMC #191138,US,US,9,...,,0.0,,,,True,,,False,False
2,737265056,737265056,5000,5000.0,2016-11-08T09:18:39,7.47,Play Store,US,US,9,...,,0.0,,,,False,,,False,False
3,737265056,737265056,5000,5000.0,2016-12-10T02:14:50,7.47,Play Store,US,US,9,...,,0.0,,,,False,,,False,False
4,830329091,830329091,5000,5000.0,2016-03-24T21:04:46,71.18,Tim Hortons #947751,US,US,2,...,,0.0,,,,True,,,False,False
5,830329091,830329091,5000,5000.0,2016-04-19T16:24:27,30.76,In-N-Out #422833,US,US,2,...,,0.0,,,,True,,,False,False
6,830329091,830329091,5000,5000.0,2016-05-21T14:50:35,57.28,Krispy Kreme #685312,US,US,2,...,,0.0,,,,True,,,False,False
7,830329091,830329091,5000,5000.0,2016-06-03T00:31:21,9.37,Shake Shack #968081,US,US,5,...,,0.0,,,,True,,,False,False
8,830329091,830329091,5000,4990.63,2016-06-10T01:21:46,523.67,Burger King #486122,,US,2,...,,9.37,,,,True,,,False,False
9,830329091,830329091,5000,5000.0,2016-07-11T10:47:16,164.37,Five Guys #510989,US,US,5,...,,0.0,,,,True,,,False,False


# Finding Donors DF

In [None]:
donors_df = pd.read_csv(FINDING_DONORS_PATH)

In [None]:
donors_df.columns

Index(['age', 'workclass', 'education_level', 'education-num',
       'marital-status', 'occupation', 'relationship', 'race', 'sex',
       'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
       'income'],
      dtype='object')

In [None]:
donors_df.iloc[:10, :]

Unnamed: 0,age,workclass,education_level,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,39,State-gov,Bachelors,13.0,Never-married,Adm-clerical,Not-in-family,White,Male,2174.0,0.0,40.0,United-States,<=50K
1,50,Self-emp-not-inc,Bachelors,13.0,Married-civ-spouse,Exec-managerial,Husband,White,Male,0.0,0.0,13.0,United-States,<=50K
2,38,Private,HS-grad,9.0,Divorced,Handlers-cleaners,Not-in-family,White,Male,0.0,0.0,40.0,United-States,<=50K
3,53,Private,11th,7.0,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0.0,0.0,40.0,United-States,<=50K
4,28,Private,Bachelors,13.0,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0.0,0.0,40.0,Cuba,<=50K
5,37,Private,Masters,14.0,Married-civ-spouse,Exec-managerial,Wife,White,Female,0.0,0.0,40.0,United-States,<=50K
6,49,Private,9th,5.0,Married-spouse-absent,Other-service,Not-in-family,Black,Female,0.0,0.0,16.0,Jamaica,<=50K
7,52,Self-emp-not-inc,HS-grad,9.0,Married-civ-spouse,Exec-managerial,Husband,White,Male,0.0,0.0,45.0,United-States,>50K
8,31,Private,Masters,14.0,Never-married,Prof-specialty,Not-in-family,White,Female,14084.0,0.0,50.0,United-States,>50K
9,42,Private,Bachelors,13.0,Married-civ-spouse,Exec-managerial,Husband,White,Male,5178.0,0.0,40.0,United-States,>50K


# Online Fraud DF

In [None]:
online_df = pd.read_csv(ONLINE_FRAUD_PATH)

In [None]:
online_df.columns

Index(['step', 'type', 'amount', 'nameOrig', 'oldbalanceOrg', 'newbalanceOrig',
       'nameDest', 'oldbalanceDest', 'newbalanceDest', 'isFraud',
       'isFlaggedFraud'],
      dtype='object')

type, nameOrig, oldbalanceOrg, newbalanceOrig, oldbalanceDest, newbalanceDest, isFraud

In [None]:
online_df.iloc[:10, :]

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0
5,1,PAYMENT,7817.71,C90045638,53860.0,46042.29,M573487274,0.0,0.0,0,0
6,1,PAYMENT,7107.77,C154988899,183195.0,176087.23,M408069119,0.0,0.0,0,0
7,1,PAYMENT,7861.64,C1912850431,176087.23,168225.59,M633326333,0.0,0.0,0,0
8,1,PAYMENT,4024.36,C1265012928,2671.0,0.0,M1176932104,0.0,0.0,0,0
9,1,DEBIT,5337.77,C712410124,41720.0,36382.23,C195600860,41898.0,40348.79,0,0


 **DATA CLEANING**

# **DATA PREPROCESSING**

# Fraud Detection

In [None]:
data_fraud_df = fraud_df.iloc[:, [6, 11, 18, 21, 28]]
data_fraud_df

Unnamed: 0,merchantName,merchantCategoryCode,transactionType,merchantCity,isFraud
0,Uber,rideshare,PURCHASE,,False
1,AMC #191138,entertainment,PURCHASE,,False
2,Play Store,mobileapps,PURCHASE,,False
3,Play Store,mobileapps,PURCHASE,,False
4,Tim Hortons #947751,fastfood,PURCHASE,,False
...,...,...,...,...,...
786358,Lyft,rideshare,PURCHASE,,False
786359,hulu.com,online_subscriptions,PURCHASE,,False
786360,Lyft,rideshare,PURCHASE,,False
786361,walmart.com,online_retail,PURCHASE,,False


Labelling Fraud DF

In [None]:
le = LabelEncoder()
# data_fraud_df['transactionType'] = le.fit_transform(data_fraud_df['transactionType'])
# trans_type_info = dict(zip(le.classes_, le.transform(le.classes_)))

# data_fraud_df['merchantCategoryCode'] = le.fit_transform(data_fraud_df['merchantCategoryCode'])
# merchant_cat_info = dict(enumerate(le.classes_, 0))

data_fraud_df['isFraud'] = le.fit_transform(data_fraud_df['isFraud'])
is_Fraud_info = dict(enumerate(le.classes_, 0))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_fraud_df['isFraud'] = le.fit_transform(data_fraud_df['isFraud'])


In [None]:
print(is_Fraud_info)

{0: False, 1: True}


In [None]:
# # remove #[num] in columns merchantName

mn_removed = data_fraud_df['merchantName'].str.replace(r"#\d+$", "")
data_fraud_df['merchantName'] = mn_removed
data_fraud_df['merchantName']

  mn_removed = data_fraud_df['merchantName'].str.replace(r"#\d+$", "")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_fraud_df['merchantName'] = mn_removed


0                 Uber
1                 AMC 
2           Play Store
3           Play Store
4         Tim Hortons 
              ...     
786358            Lyft
786359        hulu.com
786360            Lyft
786361     walmart.com
786362            Uber
Name: merchantName, Length: 786363, dtype: object

In [None]:
mn_col = data_fraud_df['merchantName']
list_city = ['Surabaya', 'Jakarta', 'Bandung', 'Yogyakarta', 'Medan', 'Semarang', 'Makassar', 'Denpasar', 'Lombok', 'Balikpapan', 'Aceh', 'Serang', 'Pontianak', 'Gorontalo', 'Mamuju']

new_constraint_data = {merchant_city:random.choice(list_city) for merchant_city in mn_col}
new_merchant_city = [new_constraint_data[merchant_city] for merchant_city in data_fraud_df['merchantName']]

data_fraud_df['merchantCity'] = new_merchant_city  # timpa data merchant city

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_fraud_df['merchantCity'] = new_merchant_city  # timpa data merchant city


In [None]:
print(new_constraint_data)

{'Uber': 'Makassar', 'AMC ': 'Lombok', 'Play Store': 'Surabaya', 'Tim Hortons ': 'Medan', 'In-N-Out ': 'Pontianak', 'Krispy Kreme ': 'Gorontalo', 'Shake Shack ': 'Lombok', 'Burger King ': 'Gorontalo', 'Five Guys ': 'Mamuju', "Auntie Anne's ": 'Denpasar', 'GreenCook': 'Semarang', 'Washington Repair': 'Serang', 'Eazy Tire': 'Gorontalo', 'Convenient Auto Services': 'Semarang', 'Shell Auto Body': 'Pontianak', 'Fast Auto Services': 'Jakarta', 'staples.com': 'Aceh', 'Convenient Repair': 'Mamuju', 'Shell Repair': 'Jakarta', 'Eazy Repair': 'Mamuju', 'Fast Repair': 'Makassar', 'Eazy Auto Body': 'Balikpapan', 'Washington Tire': 'Medan', 'target.com': 'Yogyakarta', 'Convenient Tire': 'Serang', 'amazon.com': 'Mamuju', 'Shell Tire': 'Pontianak', 'Merchants Auto Body': 'Surabaya', 'gap.com': 'Denpasar', 'Fast Tire': 'Makassar', 'sears.com': 'Denpasar', 'apple.com': 'Mamuju', 'Merchants Tire': 'Makassar', 'Ricks Auto Body': 'Surabaya', 'Merchants Auto Services': 'Balikpapan', 'Curves ': 'Aceh', 'oldn

In [None]:
data_fraud_df

Unnamed: 0,merchantName,merchantCategoryCode,transactionType,merchantCity,isFraud
0,Uber,rideshare,PURCHASE,Makassar,0
1,AMC,entertainment,PURCHASE,Lombok,0
2,Play Store,mobileapps,PURCHASE,Surabaya,0
3,Play Store,mobileapps,PURCHASE,Surabaya,0
4,Tim Hortons,fastfood,PURCHASE,Medan,0
...,...,...,...,...,...
786358,Lyft,rideshare,PURCHASE,Yogyakarta,0
786359,hulu.com,online_subscriptions,PURCHASE,Surabaya,0
786360,Lyft,rideshare,PURCHASE,Yogyakarta,0
786361,walmart.com,online_retail,PURCHASE,Aceh,0


In [None]:
x_data = data_fraud_df.iloc[:, :-1]
y_data = data_fraud_df.iloc[:, -1:]

ENCODING

In [None]:
# encoded = pd.get_dummies(x_data['merchantCity'])
# new_encoded_col_city = dict(enumerate(encoded, 1))
# new_encoded_col_city = {y: x for x, y in new_encoded_col_city.items()} #transformasi kolom jadi baris  ## switching keys and values  #
# encoded = encoded.rename(columns = new_encoded_col_city)
# x_encoded = pd.concat([x_data.iloc[:, :-1], encoded], axis=1)
# x_encoded

In [None]:
# optional kalau mau labeling merchantName
# x_encoded_label = x_encoded.groupby('merchantName').ngroup()  # ----> keywords: pandas.core.groupby.GroupBy.ngroup()
# x_encoded['merchantName'] = x_encoded_label
# x_encoded

In [None]:
x_data['merchantCity']

0            Jakarta
1         Yogyakarta
2           Semarang
3           Semarang
4              Medan
             ...    
786358        Mamuju
786359      Surabaya
786360        Mamuju
786361        Lombok
786362       Jakarta
Name: merchantCity, Length: 786363, dtype: object

# On going event prediction

In [None]:
online_fraud = online_df.drop(['step', 'nameOrig', 'amount', 'nameDest', 'isFlaggedFraud'], axis=1)   #mengambil kolom2 yg dibutuhkan
online_fraud

Unnamed: 0,type,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,isFraud
0,PAYMENT,170136.00,160296.36,0.00,0.00,0
1,PAYMENT,21249.00,19384.72,0.00,0.00,0
2,TRANSFER,181.00,0.00,0.00,0.00,1
3,CASH_OUT,181.00,0.00,21182.00,0.00,1
4,PAYMENT,41554.00,29885.86,0.00,0.00,0
...,...,...,...,...,...,...
6362615,CASH_OUT,339682.13,0.00,0.00,339682.13,1
6362616,TRANSFER,6311409.28,0.00,0.00,0.00,1
6362617,CASH_OUT,6311409.28,0.00,68488.84,6379898.11,1
6362618,TRANSFER,850002.52,0.00,0.00,0.00,1


In [None]:
# variabel bebas online fraud
x_data_of = online_fraud.iloc[:, :-1]
y_data_of = online_fraud.iloc[:, -1:]
#y_data_of  # jangan dihapus, tkut aku lupa


In [None]:
# # Labelling type pembayaran diubah menjadi angka
# le = LabelEncoder()
# x_data_of['type'] = le.fit_transform(x_data_of['type'])
# dict(zip(le.classes_, le.transform(le.classes_)))

In [None]:
x_data_of

Unnamed: 0,type,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest
0,PAYMENT,170136.00,160296.36,0.00,0.00
1,PAYMENT,21249.00,19384.72,0.00,0.00
2,TRANSFER,181.00,0.00,0.00,0.00
3,CASH_OUT,181.00,0.00,21182.00,0.00
4,PAYMENT,41554.00,29885.86,0.00,0.00
...,...,...,...,...,...
6362615,CASH_OUT,339682.13,0.00,0.00,339682.13
6362616,TRANSFER,6311409.28,0.00,0.00,0.00
6362617,CASH_OUT,6311409.28,0.00,68488.84,6379898.11
6362618,TRANSFER,850002.52,0.00,0.00,0.00


Splitting untuk Online Fraud (of)

In [None]:
x_train_of, x_val_of, y_train_of, y_val_of = train_test_split(x_data_of, y_data_of, test_size=0.15)
print(f'Jumlah Data Training untuk Online Fraud: {len(x_train_of)}\nJumlah Data Validation untuk Online Fraud: {len(x_val_of)}')

Jumlah Data Training untuk Online Fraud: 5408227
Jumlah Data Validation untuk Online Fraud: 954393


In [None]:
x_train_of

Unnamed: 0,type,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest
2759734,CASH_OUT,815.00,0.00,0.00,184687.12
5199155,PAYMENT,125649.41,107686.64,0.00,0.00
5872782,CASH_OUT,0.00,0.00,1720571.93,1825567.62
4712208,CASH_IN,2088626.65,2163452.45,894062.07,819236.26
4301319,CASH_OUT,0.00,0.00,4844516.08,5125084.43
...,...,...,...,...,...
5168147,PAYMENT,0.00,0.00,0.00,0.00
1504958,CASH_OUT,0.00,0.00,388545.56,654280.45
2615269,TRANSFER,60.00,0.00,29887.39,443956.50
3382395,PAYMENT,465250.58,457708.34,0.00,0.00


Normalisasi Data

In [None]:
sc = StandardScaler()
x_train_of.iloc[:, 1:] = sc.fit_transform(x_train_of.iloc[:, 1:])
x_val_of.iloc[:, 1:] = sc.transform(x_val_of.iloc[:, 1:])

x_train_of

Unnamed: 0,type,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest
2759734,CASH_OUT,-0.288420,-0.292420,-0.323384,-0.282779
5199155,PAYMENT,-0.245214,-0.255606,-0.323384,-0.332953
5872782,CASH_OUT,-0.288702,-0.292420,0.181804,0.162997
4712208,CASH_IN,0.434180,0.447185,-0.060873,-0.110392
4301319,CASH_OUT,-0.288702,-0.292420,1.099045,1.059372
...,...,...,...,...,...
5168147,PAYMENT,-0.288702,-0.292420,-0.323384,-0.332953
1504958,CASH_OUT,-0.288702,-0.292420,-0.209300,-0.155205
2615269,TRANSFER,-0.288681,-0.292420,-0.314608,-0.212344
3382395,PAYMENT,-0.127677,-0.135946,-0.323384,-0.332953


# **Model Building**

Pre-event Model

Adalah model yang dipakai untuk mengklasifikasikan event sebelum berlangsung (Pada saat proses inisiasi) atau ketika user akan membuat suatu event, Data **nama_penyelengara**, **kategori_event**, **tipe_transaksi**, dan **kota_asal**

In [None]:
unique_merchant = np.unique(x_data['merchantName'])
unique_transaction_type = np.unique(x_data['transactionType'])
unique_merchant_category = np.unique(x_data['merchantCategoryCode'])


In [None]:
class PreEventModel(tf.keras.Model):
  def __init__(self,name=None, input_shape=(None, 18), city_list=list_city, merchant_name=unique_merchant, trans_type=unique_transaction_type, merchant_cat=unique_merchant_category):
    super(PreEventModel, self).__init__(name=name)
    self.embed_dim = 32
    self.threshold = 0.7

    self.merchant_embed = tf.keras.Sequential([
        tf.keras.layers.StringLookup(vocabulary=merchant_name, mask_token=None),
        tf.keras.layers.Embedding(len(unique_merchant) + 1, self.embed_dim)
    ], name='merchant_embed')

    self.transType_model = tf.keras.Sequential([
        tf.keras.layers.StringLookup(vocabulary=trans_type),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(len(trans_type))
    ], name='transType_model')

    self.merchantCategory_model = tf.keras.Sequential([
        tf.keras.layers.StringLookup(vocabulary=trans_type, output_mode='multi_hot'),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(len(merchant_cat))
    ], name='merchant_cat_model')

    self.merchantCity_embed = tf.keras.Sequential([
        tf.keras.layers.StringLookup(vocabulary=city_list, output_mode='one_hot'),
        tf.keras.layers.Dense(len(list_city))
    ], name='merchantCity_embed')

    self.last_layer = tf.keras.Sequential([
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ], name='output_model')



  def call(self, inputs):
    merchantName, merchant_cat, merchant_type, merchantCity = inputs
    self.merged_model = tf.concat([self.merchant_embed(merchantName), self.merchantCategory_model(merchant_cat), self.transType_model(merchant_type), self.merchantCity_embed(merchantCity)], axis=1)
    return self.last_layer(self.merged_model)

In [None]:
unique_type = np.unique(x_data_of['type'])

In [None]:
class OnGoingEventModel(tf.keras.Model):
  def __init__(self,input_shape=(None,2), type_list=unique_type):
    super().__init__()
    self.threshold = 0.7
    self.event_holder_model = tf.keras.Sequential([
        tf.keras.layers.Dense(256, activation='relu', input_shape=input_shape),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(32)
    ], name='event_holder_model')
    self.costumer_model = tf.keras.Sequential([
        tf.keras.layers.Dense(256, activation='relu', input_shape=input_shape),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(32)
    ], name='costumer_model')

    self.type_model = tf.keras.Sequential([
        tf.keras.layers.StringLookup(vocabulary=type_list),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dense(len(type_list))
    ], name='type_model')

    self.last_layer = tf.keras.Sequential([
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ], name='output_model')

  def call(self, inputs):
    trans_type, event_holder_data , costumer_data = inputs
    self.merged_model = tf.concat([self.type_model(trans_type), self.costumer_model(costumer_data), self.event_holder_model(event_holder_data)], axis=1)
    return self.last_layer(self.merged_model)

# **Data Training**

In [None]:
EPOCH = 2

In [None]:
def get_pre_event_model():


In [None]:
pre_model = PreEventModel()

In [None]:
onGoing_model = OnGoingEventModel()

# **pre-model Event build**

**Model_var** = pre_model

**Features** : x_data,
**label** : y_data

**Data Splitting**

In [None]:
x_train_fraud, x_val_fraud, y_train_fraud, y_val_fraud = train_test_split(x_data, y_data, test_size=0.10)
print(f'Jumlah Data Training untuk Fraud: {len(x_train_fraud)}\nJumlah Data Validation untuk Fraud: {len(x_val_fraud)}')

In [None]:
print(x_train_fraud.shape, y_train_fraud.shape)

(707726, 4) (707726, 1)


x_train_fraud.iloc[:, 0].values, x_train_fraud.iloc[:, 1:-1].values, x_train_fraud.iloc[:, -1].values

In [None]:
x_train_fraud.columns

Index(['merchantName', 'merchantCategoryCode', 'transactionType',
       'merchantCity'],
      dtype='object')

In [None]:
merchant_name, merchant_cat, merchant_type, merchant_city = x_train_fraud['merchantName'].values, x_train_fraud['merchantCategoryCode'].values, x_train_fraud['transactionType'].values, x_train_fraud['merchantCity'].values
y_data = y_train_fraud.values

In [None]:
pre_model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
features_model_input = [merchant_name, np.expand_dims(merchant_cat, axis=1), np.expand_dims(merchant_type, axis=1), merchant_city]

In [None]:
pre_model.fit(features_model_input, y_data, epochs=EPOCH, batch_size=1024)

In [None]:
pre_model.summary()

Model: "pre_event_model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 merchant_embed (Sequential)  (None, 32)               6592      
                                                                 
 transType_model (Sequential  (None, 4)                33924     
 )                                                               
                                                                 
 merchant_cat_model (Sequent  (None, 19)               36883     
 ial)                                                            
                                                                 
 merchantCity_embed (Sequent  (None, 15)               255       
 ial)                                                            
                                                                 
 output_model (Sequential)   (None, 1)                 55233     
                                                 

# **onGoingEvent Build**

**Model_var** = onGoing_model

**Train_Features** : x_train_of,
**Train_label** : y_train_of

**val_Features** : x_val_of,
**val_label** : y_val_of

In [None]:
print(x_train_of.shape, y_train_of.shape)

(5408227, 5) (5408227, 1)


In [None]:
type_data, costumer_data, event_holder_data = np.expand_dims(x_train_of.iloc[:, 0].values, axis=1), x_train_of.iloc[:,1:-2].values, x_train_of.iloc[:, -2:].values
y_train_of = y_train_of.values

In [None]:
onGoing_model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
onGoing_model.fit([type_data, costumer_data, event_holder_data], y_train_of, epochs=EPOCH, batch_size=65536)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7efbe0773310>

# **Model Testing**

# Pre-Event Model

In [None]:
x_val_fraud.iloc[20, 0].values

In [None]:
pre_model.evaluate([x_val_fraud.iloc[:, 0].values, x_val_fraud.iloc[:, 1:-1].values, x_val_fraud.iloc[:, -1].values], y_val_fraud, return_dict=True )

In [None]:
y_pre_pred = pre_model.predict([x_val_fraud.iloc[20, 0], x_val_fraud.iloc[20:25, 1:-1], x_val_fraud.iloc[20, -1]])
y_pre_pred



array([[2.04285011e-02],
       [1.27778575e-02],
       [2.63563748e-02],
       [1.28777783e-05],
       [1.28777783e-05]], dtype=float32)

In [None]:
np.where(y_pre_pred > pre_model.threshold , 1 , 0)

array([[0],
       [0],
       [0],
       [0],
       [0]])

In [None]:
tf.saved_model.save(pre_model, '/content/drive/MyDrive/CAPSTONE 2023/')



# On-going Event Model

In [None]:
onGoing_model.evaluate([np.expand_dims(x_val_of.iloc[:, 0].values, axis=1), x_val_of.iloc[:,1:-2].values, x_val_of.iloc[:, -2:].values], y_val_of, return_dict=True )



{'loss': 0.0032163113355636597, 'accuracy': 0.9993011355400085}

In [None]:
y_on_pred = onGoing_model.predict([np.expand_dims(x_val_of.iloc[10:15, 0].values, axis=1), x_val_of.iloc[10:15,1:-2].values, x_val_of.iloc[10:15, -2:].values])
y_on_pred



array([[1.5781833e-04],
       [4.2757316e-04],
       [2.5923412e-06],
       [3.1840544e-05],
       [9.2897948e-04]], dtype=float32)

In [None]:
np.where(y_on_pred > onGoing_model.threshold , 1 , 0)

array([[0],
       [0],
       [0],
       [0],
       [0]])