### Loading the dataset

In [19]:
import pandas as pd

df = pd.read_csv('temp/olist_orders_dataset_df.csv')
X = df.drop(['anomaly'], axis=1)
y = df['anomaly']


In [20]:
X.shape

(102280, 18)

### Normalizing

In [21]:
from sklearn.preprocessing import QuantileTransformer

qt = QuantileTransformer(output_distribution='normal')
X = qt.fit_transform(X)


### Feature Scaling

In [22]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X = scaler.fit_transform(X)


### Import Necessary Libraries:

In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets as datasets


### Define the Autoencoder Architecture

In [24]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        # Encoder layers
        self.encoder = nn.Sequential(
            nn.Linear(18, 14),  # Assuming the first hidden layer has 12 nodes
            nn.ReLU(),
            nn.Linear(14, 10),   # Further compressing to 8 nodes
            nn.ReLU(),
            nn.Linear(10, 6)
        )
        # Decoder layers
        self.decoder = nn.Sequential(
            nn.Linear(6, 10),
            nn.ReLU(),
            nn.Linear(10, 14),
            nn.ReLU(),
            nn.Linear(14, 18),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


### Instantiate the Model, Loss Function, and Optimizer

In [25]:
model = Autoencoder()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


### Convert DataFrame to PyTorch Tensor

In [26]:
tensor_data = torch.Tensor(X)


### DataLoader

In [27]:
from torch.utils.data import DataLoader, TensorDataset

dataset = TensorDataset(tensor_data)
train_loader = DataLoader(dataset, batch_size=64, shuffle=True)


### Training Loop

In [28]:
# Example training loop
num_epochs = 300
for epoch in range(num_epochs):
    for batch_features, in train_loader:  # DataLoader will unpack the features
        # Forward pass
        outputs = model(batch_features)
        loss = criterion(outputs, batch_features)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/300], Loss: 0.0177
Epoch [2/300], Loss: 0.0107
Epoch [3/300], Loss: 0.0068
Epoch [4/300], Loss: 0.0111
Epoch [5/300], Loss: 0.0156
Epoch [6/300], Loss: 0.0070
Epoch [7/300], Loss: 0.0099
Epoch [8/300], Loss: 0.0118
Epoch [9/300], Loss: 0.0089
Epoch [10/300], Loss: 0.0092
Epoch [11/300], Loss: 0.0065
Epoch [12/300], Loss: 0.0080
Epoch [13/300], Loss: 0.0077
Epoch [14/300], Loss: 0.0095
Epoch [15/300], Loss: 0.0052
Epoch [16/300], Loss: 0.0067
Epoch [17/300], Loss: 0.0105
Epoch [18/300], Loss: 0.0089
Epoch [19/300], Loss: 0.0062
Epoch [20/300], Loss: 0.0085
Epoch [21/300], Loss: 0.0059
Epoch [22/300], Loss: 0.0075
Epoch [23/300], Loss: 0.0066
Epoch [24/300], Loss: 0.0079
Epoch [25/300], Loss: 0.0089
Epoch [26/300], Loss: 0.0065
Epoch [27/300], Loss: 0.0086
Epoch [28/300], Loss: 0.0053
Epoch [29/300], Loss: 0.0085
Epoch [30/300], Loss: 0.0094
Epoch [31/300], Loss: 0.0056
Epoch [32/300], Loss: 0.0057
Epoch [33/300], Loss: 0.0032
Epoch [34/300], Loss: 0.0074
Epoch [35/300], Loss: 0

### Encoding the data

In [29]:
encoded_dataset = model.encoder(tensor_data)
encoded_dataset = encoded_dataset.detach().numpy()
encoded_dataset = pd.DataFrame(encoded_dataset)

encoded_dataset = pd.concat([encoded_dataset, y], axis=1)


In [30]:
encoded_dataset.to_csv('temp/olist_orders_dataset_autoencoder_df.csv', index=False)
