In [None]:
from google.colab import drive
drive._mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import logging

import pandas as pd
import tensorflow.keras as keras

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import plot_model

# Log setting
logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s", datefmt="%H:%M:%S", level=logging.INFO)

# Change display.max_rows to show all features.
pd.set_option("display.max_rows", 85)

In [None]:
import numpy as np
import tensorflow as tf
import keras
from keras import layers

In [None]:
df_train = pd.read_csv('/content/drive/MyDrive/train_MachineLearningCVE.csv', skipinitialspace=True)
df_test = pd.read_csv('/content/drive/MyDrive/test_MachineLearningCVE.csv', skipinitialspace=True)

In [None]:
df = pd.concat([df_train, df_test], axis=0, copy=True)

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler

def preprocessing(df: pd.DataFrame) -> (np.ndarray, np.ndarray):
    # Shuffle the dataset
    df = df.sample(frac=1)

    # Split features and labels
    x = df.iloc[:, df.columns != 'Label']
    y = df[['Label']].to_numpy()

    # Scale the features between 0 ~ 1
    scaler = MinMaxScaler()
    x = scaler.fit_transform(x)

    return x, y

x, y = preprocessing(df)

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(x, y, train_size=0.7, random_state=42)

In [None]:
import warnings
warnings.filterwarnings("ignore")
import tensorflow as tf
#tf.logging.set_verbosity(tf.logging.ERROR)
import itertools
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix,accuracy_score,recall_score,precision_score,f1_score
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from keras.layers import Input,Dropout,Dense
from keras.models import Model
from keras import regularizers
from keras.utils.data_utils import get_file
%matplotlib inline

In [None]:
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils import data

In [None]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(49, 500),
            nn.ReLU(inplace=True),
            nn.Linear(500, 250),
            nn.ReLU(inplace=True),
            nn.Linear(250, 50),
            nn.ReLU(inplace=True),
            nn.Linear(50, 3),
            nn.ReLU(inplace=True)
        )

        self.decoder = nn.Sequential(
            nn.Linear(3, 50),
            nn.ReLU(inplace=True),
            nn.Linear(50, 250),
            nn.ReLU(inplace=True),
            nn.Linear(250, 500),
            nn.ReLU(inplace=True),
            nn.Linear(500, 49),
            #nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:

def L(X, X_, t):
    if t == 'mse':
        l=nn.MSELoss()
        return l(X, X_)
     

def R(X):
    return torch.mm(X, torch.t(X))

def tau(X, t):
    return torch.where(X < t, X.float(), torch.zeros(X.shape).float())

def rae_loss(alpha, t, L_type='mse'):
    def rae(y_true, y_pred):
        return (1 - alpha)*L(y_true, y_pred, L_type) + alpha*L(tau(R(y_true), t), tau(R(y_pred), t), L_type)
    return rae

In [None]:
alphas = np.linspace(0, 1, 15)
t = 1
alpha=alphas[1]
#alpha=0.05
#rae_loss(alpha, t, 'mse')
print(alpha)
print(alphas)

0.07142857142857142
[0.         0.07142857 0.14285714 0.21428571 0.28571429 0.35714286
 0.42857143 0.5        0.57142857 0.64285714 0.71428571 0.78571429
 0.85714286 0.92857143 1.        ]


In [None]:
def model_training(autoencoder, train_loader, epoch):
    #loss_metric = nn.MSELoss()
    loss_metric =rae_loss(alpha, t, 'mse')
    optimizer = torch.optim.Adam(autoencoder.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    

    autoencoder.train()
    for i, data in enumerate(train_loader):
        optimizer.zero_grad()
        DataPoint = data.float()
        DataPoint = Variable(DataPoint)
        DataPoint = DataPoint.view(DataPoint.size(0), -1)
        #if cuda: DataPoint = DataPoint.to(device)
        outputs = autoencoder(DataPoint)
        loss = loss_metric(outputs, DataPoint)
        loss.backward()
        optimizer.step()
        if (i + 1) % LOG_INTERVAL == 0:
            print('Epoch [{}/{}] - Iter[{}/{}], HUBER loss:{:.4f}'.format(
                epoch + 1, EPOCHS, i + 1, len(X_train) // BATCH_SIZE, loss.item()
                ))

In [None]:
def evaluation(autoencoder, test_loader):
    total_loss = 0
    #loss_metric = nn.MSELoss()
    loss_metric =rae_loss(alpha, t, 'mse')
    autoencoder.eval()
    for i, data in enumerate(test_loader):
        DataPoint = data.float()
        DataPoint = Variable(DataPoint)
        DataPoint = DataPoint.view(DataPoint.size(0), -1)
        #if cuda: DataPoint = DataPoint.to(device)
        outputs = autoencoder(DataPoint)
        loss = loss_metric(outputs, DataPoint)
        total_loss += loss * len(DataPoint)
    avg_loss = total_loss / len(test_loader.dataset)

    print('\nAverage MSE Loss on Test set: {:.4f}'.format(avg_loss))

    global BEST_VAL
    if TRAIN_SCRATCH and avg_loss < BEST_VAL:
        BEST_VAL = avg_loss
        torch.save(autoencoder.state_dict(), './simple_autoencoder.pt')
        print('Save Best Model\n')

In [None]:
import datetime

if __name__ == '__main__':

    EPOCHS = 100
    BATCH_SIZE = 100
    LEARNING_RATE = 1e-3
    WEIGHT_DECAY = 1e-5
    LOG_INTERVAL = 100
    TRAIN_SCRATCH = True        # whether to train a model from scratch
    BEST_VAL = float('inf')     # record the best val loss

    #train_loader, test_loader = data_utils.load_mnist(BATCH_SIZE)
    torch.manual_seed(39)

    autoencoder = Autoencoder()
    #if cuda: autoencoder.to(device)

    if TRAIN_SCRATCH:
        # Training autoencoder from scratch
        for epoch in range(EPOCHS):
            starttime = datetime.datetime.now()
            model_training(autoencoder, df_train, epoch)
            endtime = datetime.datetime.now()
            print(f'Train a epoch in {(endtime - starttime).seconds} seconds')
            # evaluate on test set and save best model
            evaluation(autoencoder, test_loader)
        print('Trainig Complete with best validation loss {:.4f}'.format(BEST_VAL))

    else:
        autoencoder.load_state_dict(torch.load('/simple_autoencoder.pt'))
        evaluation(autoencoder, test_loader)

        autoencoder.cpu()
        dataiter = iter(X_train)
        DataPoint, _ = next(dataiter)
        DataPoint = Variable(DataPoint[:32])
        outputs = autoencoder(DataPoint.view(DataPoint.size(0), -1))

AttributeError: ignored