## Encoder Decoder

In [1]:
import pandas as pd
import numpy as np

df = pd.read_parquet("../0 - Data/4 - scaled/ft_strategy_2_scaled.pq")
df.head(2)

Unnamed: 0,Amount,MCC,Has Chip,Cards Issued,Credit Limit,Current Age,Retirement Age,Latitude,Longitude,Per Capita Income - Zipcode,...,Use Chip_Online Transaction,Use Chip_Swipe Transaction,Card Type_Debit,Card Type_Debit (Prepaid),Gender_Male,Age Group_26-35,Age Group_36-45,Age Group_46-60,Age Group_60+,Is Fraud
6780,1.025128,-0.028966,0.337145,0.963448,0.738314,-0.09811,-0.185577,-0.631234,-1.563208,0.447753,...,False,True,True,False,False,False,False,True,False,0
6781,1.025128,-0.028966,0.337145,0.963448,0.551106,-0.09811,-0.185577,-0.631234,-1.563208,0.447753,...,False,True,True,False,False,False,False,True,False,0


In [6]:
X = df.drop(columns=['Is Fraud', 'Datetime'])

# Set the dimensions for the encoder and bottleneck (latent) layer
input_dim = X.shape[1] 
encoding_dim = 50 

X.head(2)

Unnamed: 0,Amount,MCC,Has Chip,Cards Issued,Credit Limit,Current Age,Retirement Age,Latitude,Longitude,Per Capita Income - Zipcode,...,Bad PIN Error,Use Chip_Online Transaction,Use Chip_Swipe Transaction,Card Type_Debit,Card Type_Debit (Prepaid),Gender_Male,Age Group_26-35,Age Group_36-45,Age Group_46-60,Age Group_60+
6780,1.025128,-0.028966,0.337145,0.963448,0.738314,-0.09811,-0.185577,-0.631234,-1.563208,0.447753,...,False,False,True,True,False,False,False,False,True,False
6781,1.025128,-0.028966,0.337145,0.963448,0.551106,-0.09811,-0.185577,-0.631234,-1.563208,0.447753,...,False,False,True,True,False,False,False,False,True,False


1. Define the Autoencoder Architecture

In [7]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam


# Encoder
input_layer = Input(shape=(input_dim,))
encoder = Dense(64, activation='relu')(input_layer)
encoder = Dense(32, activation='relu')(encoder)
bottleneck = Dense(encoding_dim, activation='relu')(encoder)  # Bottleneck layer

# Decoder
decoder = Dense(32, activation='relu')(bottleneck)
decoder = Dense(64, activation='relu')(decoder)
output_layer = Dense(input_dim, activation='linear')(decoder)  # Reconstruction output

# Build the Autoencoder Model
autoencoder = Model(inputs=input_layer, outputs=output_layer)

# Compile the Model
autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

2. Train the Auto-Encoder

In [8]:
autoencoder.fit(X, X, epochs=20, batch_size=64, validation_split=0.2, verbose=1)

Epoch 1/20
[1m 14337/167655[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2:32[0m 992us/step - loss: 0.0748

KeyboardInterrupt: 

3. Extract the Encoder Model for Dimensionality Reduction

In [None]:
encoder_model = Model(inputs=input_layer, outputs=bottleneck)

# Transform the data into the reduced dimension using the encoder
X_reduced = encoder_model.predict(X)

# Convert the reduced data into a DataFrame and add the target column
reduced_df = pd.DataFrame(X_reduced, columns=[f'Encoded_{i+1}' for i in range(encoding_dim)])
reduced_df['Is Fraud'] = df['Is Fraud'].values

4. Save the reduced DataFrame to a file

In [None]:
import os

clean_dir = "../0 - Data/5 - pcas"
if not os.path.exists(clean_dir):
    os.makedirs(clean_dir)

reduced_df.to_parquet("../0 - Data/5 - pcas/ft_strategy_2_ae_reduction.pq")