### Loading the dataset

In [10]:
import pandas as pd

df = pd.read_csv('temp/olist_orders_dataset_df.csv')
X = df.drop(['anomaly'], axis=1)

columns1 = ['time_estimate_delivery', 'year', 'month', 'day', 'freight_value', 'seller_zip_code_prefix', 'seller_geolocation_lat', 'seller_geolocation_lng','customer_zip_code_prefix', 'customer_geolocation_lat', 'customer_geolocation_lng', 'distance']
X = X[columns1]


### Normalizing

In [11]:
from sklearn.preprocessing import QuantileTransformer

qt = QuantileTransformer(output_distribution='normal')
X = qt.fit_transform(X)

### Feature Scaling

In [12]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

### Import Necessary Libraries:

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets as datasets


### Define the Autoencoder Architecture

In [14]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        # Encoder layers
        self.encoder = nn.Sequential(
            nn.Linear(12, 9),
            nn.ReLU(),
            nn.Linear(9, 6)
        )
        # Decoder layers
        self.decoder = nn.Sequential(
            nn.Linear(6, 9),
            nn.ReLU(),
            nn.Linear(9, 12)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


### Instantiate the Model, Loss Function, and Optimizer

In [15]:
model = Autoencoder()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


### Convert DataFrame to PyTorch Tensor

In [16]:
tensor_data = torch.Tensor(X)


### DataLoader

In [17]:
from torch.utils.data import DataLoader, TensorDataset

dataset = TensorDataset(tensor_data)
train_loader = DataLoader(dataset, batch_size=64, shuffle=True)


### Training Loop

In [18]:
# Example training loop
num_epochs = 500
for epoch in range(num_epochs):
    for batch_features, in train_loader:  # DataLoader will unpack the features
        # Forward pass
        outputs = model(batch_features)
        loss = criterion(outputs, batch_features)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/500], Loss: 0.0223
Epoch [2/500], Loss: 0.0221
Epoch [3/500], Loss: 0.0152
Epoch [4/500], Loss: 0.0127
Epoch [5/500], Loss: 0.0065
Epoch [6/500], Loss: 0.0087
Epoch [7/500], Loss: 0.0096
Epoch [8/500], Loss: 0.0107
Epoch [9/500], Loss: 0.0106
Epoch [10/500], Loss: 0.0042
Epoch [11/500], Loss: 0.0069
Epoch [12/500], Loss: 0.0071
Epoch [13/500], Loss: 0.0072
Epoch [14/500], Loss: 0.0082
Epoch [15/500], Loss: 0.0044
Epoch [16/500], Loss: 0.0068
Epoch [17/500], Loss: 0.0055
Epoch [18/500], Loss: 0.0048
Epoch [19/500], Loss: 0.0046
Epoch [20/500], Loss: 0.0065
Epoch [21/500], Loss: 0.0037
Epoch [22/500], Loss: 0.0048
Epoch [23/500], Loss: 0.0046
Epoch [24/500], Loss: 0.0048
Epoch [25/500], Loss: 0.0039
Epoch [26/500], Loss: 0.0036
Epoch [27/500], Loss: 0.0059
Epoch [28/500], Loss: 0.0082
Epoch [29/500], Loss: 0.0042
Epoch [30/500], Loss: 0.0051
Epoch [31/500], Loss: 0.0031
Epoch [32/500], Loss: 0.0038
Epoch [33/500], Loss: 0.0050
Epoch [34/500], Loss: 0.0027
Epoch [35/500], Loss: 0