### Loading the dataset

In [31]:
import pandas as pd

df = pd.read_csv('temp/olist_orders_dataset_df.csv')
X = df.drop(['anomaly'], axis=1)

columns1 = ['time_estimate_delivery', 'year', 'month', 'day', 'freight_value', 'seller_zip_code_prefix', 'seller_geolocation_lat', 'seller_geolocation_lng','customer_zip_code_prefix', 'customer_geolocation_lat', 'customer_geolocation_lng', 'distance']
X = X[columns1]


In [32]:
X

Unnamed: 0,time_estimate_delivery,year,month,day,freight_value,seller_zip_code_prefix,seller_geolocation_lat,seller_geolocation_lng,customer_zip_code_prefix,customer_geolocation_lat,customer_geolocation_lng,distance
0,14,2017,10,2,8.72,9350.0,-23.680729,-46.444238,3149.0,-23.576983,-46.587161,662.084641
1,18,2018,7,24,22.76,31570.0,-19.807681,-43.980427,47813.0,-12.177924,-44.660711,9344.543745
2,27,2018,8,8,19.22,14840.0,-21.363502,-48.229601,75265.0,-16.745150,-48.514783,10713.151535
3,23,2017,11,18,27.20,31842.0,-19.837682,-43.924053,59296.0,-5.774190,-35.271143,15422.365811
4,12,2018,2,13,8.72,8752.0,-23.543395,-46.262086,9195.0,-23.676370,-46.514627,843.950334
...,...,...,...,...,...,...,...,...,...,...,...,...
102275,23,2018,2,6,20.10,17602.0,-21.930548,-50.498348,11722.0,-24.001500,-46.449864,8679.619783
102276,30,2017,8,27,65.02,8290.0,-23.553642,-46.452661,45920.0,-17.898358,-39.373630,3984.630835
102277,34,2018,1,8,40.59,37175.0,-20.940578,-45.827237,28685.0,-22.562825,-42.694574,16963.820370
102278,34,2018,1,8,40.59,37175.0,-20.940578,-45.827237,28685.0,-22.562825,-42.694574,16963.820370


### Normalizing

In [33]:
from sklearn.preprocessing import QuantileTransformer

qt = QuantileTransformer(output_distribution='normal')
X = qt.fit_transform(X)

### Feature Scaling

In [34]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

### Import Necessary Libraries:

In [35]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets as datasets


### Define the Autoencoder Architecture

In [36]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        # Encoder layers
        self.encoder = nn.Sequential(
            nn.Linear(12, 9),
            nn.ReLU(),
            nn.Linear(9, 6)
        )
        # Decoder layers
        self.decoder = nn.Sequential(
            nn.Linear(6, 9),
            nn.ReLU(),
            nn.Linear(9, 12)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


### Instantiate the Model, Loss Function, and Optimizer

In [37]:
model = Autoencoder()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


### Convert DataFrame to PyTorch Tensor

In [38]:
tensor_data = torch.Tensor(X)


### DataLoader

In [39]:
from torch.utils.data import DataLoader, TensorDataset

dataset = TensorDataset(tensor_data)
train_loader = DataLoader(dataset, batch_size=64, shuffle=True)


### Training Loop

In [40]:
# Example training loop
num_epochs = 200
for epoch in range(num_epochs):
    for batch_features, in train_loader:  # DataLoader will unpack the features
        # Forward pass
        outputs = model(batch_features)
        loss = criterion(outputs, batch_features)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/200], Loss: 0.0167
Epoch [2/200], Loss: 0.0169
Epoch [3/200], Loss: 0.0128
Epoch [4/200], Loss: 0.0126
Epoch [5/200], Loss: 0.0136
Epoch [6/200], Loss: 0.0076
Epoch [7/200], Loss: 0.0070
Epoch [8/200], Loss: 0.0081
Epoch [9/200], Loss: 0.0093
Epoch [10/200], Loss: 0.0046
Epoch [11/200], Loss: 0.0035
Epoch [12/200], Loss: 0.0067
Epoch [13/200], Loss: 0.0044
Epoch [14/200], Loss: 0.0042
Epoch [15/200], Loss: 0.0047
Epoch [16/200], Loss: 0.0033
Epoch [17/200], Loss: 0.0038
Epoch [18/200], Loss: 0.0046
Epoch [19/200], Loss: 0.0027
Epoch [20/200], Loss: 0.0029
Epoch [21/200], Loss: 0.0040
Epoch [22/200], Loss: 0.0023
Epoch [23/200], Loss: 0.0025
Epoch [24/200], Loss: 0.0036
Epoch [25/200], Loss: 0.0021
Epoch [26/200], Loss: 0.0035
Epoch [27/200], Loss: 0.0021
Epoch [28/200], Loss: 0.0022
Epoch [29/200], Loss: 0.0031
Epoch [30/200], Loss: 0.0037
Epoch [31/200], Loss: 0.0036
Epoch [32/200], Loss: 0.0048
Epoch [33/200], Loss: 0.0027
Epoch [34/200], Loss: 0.0029
Epoch [35/200], Loss: 0

KeyboardInterrupt: 