In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

from tqdm import tqdm
from IPython.display import clear_output
import matplotlib.pyplot as plt

### Modify the parameters before running
---


In [None]:

normal_feature_path = './data/features_norm_scaled.csv'
anomaly_feature_path = './data/features_anom_scaled.csv'
normal_testing_set_path = './data/norm_test.csv'
window_size = 21
model_path = './cae-21.pt'


---

In [None]:
data_norm = np.genfromtxt(normal_feature_path, delimiter=',', dtype=np.float32)
l = data_norm.shape[0]

In [None]:
# shuffle data and split into norm_train, norm_test
np.random.shuffle(data_norm)

data_norm_train = data_norm[:int(l*0.8)]
data_norm_test = data_norm[int(l*0.8)+1:]

In [None]:
# save norm_test for later testing   
with open(normal_testing_set_path, 'ab') as f:
    np.savetxt(f, data_norm_test, delimiter=',', newline='\n')


In [None]:
class CAE(nn.Module):
    def __init__(self):
        super(CAE, self).__init__()
        self.encoder = nn.Sequential( # input 21x37x1
            
            # conv1
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=2), # cols: (37-3)/2+1 = 18
                                                                                # rows: (21-3)/2+1 = 10
            nn.BatchNorm2d(num_features=16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), # 18/2=9, 10/2=5 => output is 5x9x16
            
            # conv2
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=2), # cols: (9-3)/2+1 = 4  
                                                                                 # rows: (5-3)/2+1 = 2 
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # 4/2 = 2, 2/2 = 1 => output is 1x2x32 = 64
            
        )
        
        self.decoder = nn.Sequential(
            
            # convt1
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
            nn.ConvTranspose2d(in_channels=32, out_channels=16, kernel_size=3, stride=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            
            # convt2
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
            nn.ConvTranspose2d(in_channels=16, out_channels=1, kernel_size=3, stride=2),
            nn.BatchNorm2d(1),
            nn.ReLU()
        
        )

    def forward(self, x):
        return self.decoder(self.encoder(x))

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

batch_limit = 512
batch_size = 32
epoch = 500
cae = CAE().to(device)
optimizer = torch.optim.Adam(cae.parameters(), lr=1e-3)
criterion = nn.MSELoss()
loss_list = []
val_list = []

In [None]:
for e in tqdm(range(epoch)):
    epoch_loss = 0

    for idx in range(batch_limit):
        optimizer.zero_grad()
        
        indices = np.random.randint(low=0, high=120000, size=batch_size)
        train_tensor = torch.from_numpy(np.array([data_norm_train[i] for i in indices]).reshape(batch_size, 1, window_size, 37)).to(device)
        recon_tensor = cae(train_tensor)
        
        loss = criterion(recon_tensor, train_tensor)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        print(f"Batch loss ({idx+1}/{batch_limit}): {loss.item()}", end='\r')
        
    with torch.no_grad():
        loss_list.append(epoch_loss/batch_limit)
        
        clear_output(wait=True)
        fig, ax1 = plt.subplots(dpi=120)
        ax1.plot(np.array(loss_list), label='Normal Window Loss', color='red')
        ax1.legend(loc='upper right')
        ax1.set_ylabel('Reconstruction MSE Loss')
        ax1.set_xlabel('Epoch')
        plt.show()

In [None]:
torch.save(cae.state_dict(), model_path)