# Readding and preparing the data:

In [1]:
import pandas as pd
import datetime

from google.colab import drive
drive.mount('/content/drive')

# reading the dataset
path = '/content/drive/MyDrive/'
try:
  data = pd.read_pickle(path + 'fraud_dataset.pkl')
except:
  data = pd.read_csv(path + 'fraud_dataset.csv')
  # peakle data 
  data.to_pickle('/content/drive/MyDrive/fraud_dataset.pkl')


data['hour'] = pd.to_datetime(data['datetime']).dt.hour
data.drop('datetime', axis = 1, inplace = True)
# dropping the useless column 'Unnamed: 0'
data.drop('Unnamed: 0', axis = 1, inplace = True)

train_columns = ['acquirerid', 'agriculture_list', 'rollingsum_card_merchant_24h','bankid', 'batchamount','card','emv','cup','construction_list','mean_merchant_amount','mcc', 'list_bankid_ci', 'list_bankid_b', 'fuel_list', 'pospayenvcode','merchant', 'medic_list',
                   'rollingcount_card_merchant_600s','riskmerchant', 'resp_code', 'rollingcount_merchant_24h','rollingcount_merchant_900s','rollingsum_card_24h','rollingsum_merchant_24h','rollingsum_merchant_900s',
                   'hour', 'trc', 'travel_list', 'terminal', 'super_market_list']

# let's shuffle the data first
data = data.sample(frac=1).reset_index(drop = True)

data  = data.iloc[:200000]
train_data = data[:150000]
train_data = train_data[train_data.fraud == 0]
X_train = train_data[train_columns]

test_data = data[150000:]
X_test,Y_test = test_data[train_columns], test_data['fraud']

Mounted at /content/drive


# Transformations 

In [2]:
import abc
import itertools
import numpy as np
from keras.preprocessing.image import apply_affine_transform


def get_transformer(type_trans):
    if type_trans == 'complicated':
        tr_x, tr_y = 8, 8
        transformer = Transformer(tr_x, tr_y)
        return transformer
    elif type_trans == 'simple':
        transformer = SimpleTransformer()
        return transformer


class AffineTransformation(object):
    def __init__(self, flip, tx, ty, k_90_rotate):
        self.flip = flip
        self.tx = tx
        self.ty = ty
        self.k_90_rotate = k_90_rotate

    def __call__(self, x):
        res_x = x
        if self.flip:
            res_x = np.fliplr(res_x)
        if self.tx != 0 or self.ty != 0:
            res_x = apply_affine_transform(res_x,
            tx=self.tx, ty=self.ty, channel_axis=2, fill_mode='reflect')
        if self.k_90_rotate != 0:
            res_x = np.rot90(res_x, self.k_90_rotate)
        return res_x


class AbstractTransformer(abc.ABC):
    def __init__(self):
        self._transformation_list = None
        self._create_transformation_list()

    @property
    def n_transforms(self):
        return len(self._transformation_list)

    @abc.abstractmethod
    def _create_transformation_list(self):
        return

    def transform_batch(self, x_batch, t_inds):
        assert len(x_batch) == len(t_inds)

        transformed_batch = x_batch.copy()
        for i, t_ind in enumerate(t_inds):
            transformed_batch[i] = self._transformation_list[t_ind](transformed_batch[i])
        return transformed_batch


class Transformer(AbstractTransformer):
    def __init__(self, translation_x=8, translation_y=8):
        self.max_tx = translation_x
        self.max_ty = translation_y
        super().__init__()

    def _create_transformation_list(self):
        transformation_list = []
        for is_flip, tx, ty, k_rotate in itertools.product((False, True),
                                                           (0, -self.max_tx, self.max_tx),
                                                           (0, -self.max_ty, self.max_ty),
                                                           range(4)):
            transformation = AffineTransformation(is_flip, tx, ty, k_rotate)
            transformation_list.append(transformation)
        self._transformation_list = transformation_list
        return transformation_list


class SimpleTransformer(AbstractTransformer):
    def _create_transformation_list(self):
        transformation_list = []
        for is_flip, k_rotate in itertools.product((False, True),
                                                    range(4)):
            transformation = AffineTransformation(is_flip, 0, 0, k_rotate)
            transformation_list.append(transformation)
        self._transformation_list = transformation_list
        return transformation_list

# Wideresnet

In [3]:
import torch.nn as nn
import torch.nn.init as init
import numpy as np

def weights_init(m):
    classname = m.__class__.__name__
    if isinstance(m, nn.Linear):
        init.xavier_normal_(m.weight, gain=np.sqrt(2.0))
    elif classname.find('Conv') != -1:
        init.xavier_normal_(m.weight, gain=np.sqrt(2.0))
    elif classname.find('Linear') != -1:
        init.eye_(m.weight)
    elif classname.find('Emb') != -1:
        init.normal(m.weight, mean=0, std=0.01)

class netC5(nn.Module):
    def __init__(self, d, ndf, nc):
        super(netC5, self).__init__()
        self.trunk = nn.Sequential(
        nn.Conv1d(d, ndf, kernel_size=1, bias=False),
        nn.LeakyReLU(0.2, inplace=True),
        nn.Conv1d(ndf, ndf, kernel_size=1, bias=False),
        nn.LeakyReLU(0.2, inplace=True),
        nn.Conv1d(ndf, ndf, kernel_size=1, bias=False),
        nn.LeakyReLU(0.2, inplace=True),
        nn.Conv1d(ndf, ndf, kernel_size=1, bias=False),
        nn.LeakyReLU(0.2, inplace=True),
        nn.Conv1d(ndf, ndf, kernel_size=1, bias=False),
        )
        self.head = nn.Sequential(
        nn.LeakyReLU(0.2, inplace=True),
        nn.Conv1d(ndf, nc, kernel_size=1, bias=True),
        )


    def forward(self, input):
        tc = self.trunk(input)
        ce = self.head(tc)
        return tc, ce


class netC1(nn.Module):
    def __init__(self, d, ndf, nc):
        super(netC1, self).__init__()
        self.trunk = nn.Sequential(
        nn.Conv1d(d, ndf, kernel_size=1, bias=False),
        )
        self.head = nn.Sequential(
        nn.LeakyReLU(0.2, inplace=True),
        nn.Conv1d(ndf, nc, kernel_size=1, bias=True),
        )

    def forward(self, input):
        tc = self.trunk(input)
        ce = self.head(tc)
        return tc, ce

# Optimization_tc

In [4]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
#import fcnet as model
from sklearn.metrics import precision_recall_fscore_support as prf

def tc_loss(zs, m):
    means = zs.mean(0).unsqueeze(0)
    res = ((zs.unsqueeze(2) - means.unsqueeze(1)) ** 2).sum(-1)
    pos = torch.diagonal(res, dim1=1, dim2=2)
    offset = torch.diagflat(torch.ones(zs.size(1))).unsqueeze(0).cuda() * 1e6
    neg = (res + offset).min(-1)[0]
    loss = torch.clamp(pos + m - neg, min=0).mean()
    return loss

def f_score(scores, labels, ratio):
    thresh = np.percentile(scores, ratio)
    y_pred = (scores >= thresh).astype(int)
    y_true = labels.astype(int)
    precision, recall, f_score, support = prf(y_true, y_pred, average='binary')
    return f_score


class TransClassifierTabular():
    def __init__(self, args):
        self.ds = args.dataset
        self.m = args.m
        self.lmbda = args.lmbda
        self.batch_size = args.batch_size
        self.ndf = args.ndf
        self.n_rots = args.n_rots
        self.d_out = args.d_out
        self.eps = args.eps

        self.n_epoch = args.n_epoch
        if args.dataset == "thyroid" or args.dataset == "arrhythmia":
            self.netC = netC1(self.d_out, self.ndf, self.n_rots).cuda()
        else:
            self.netC = netC5(self.d_out, self.ndf, self.n_rots).cuda()
        weights_init(self.netC)
        self.optimizerC = optim.Adam(self.netC.parameters(), lr=args.lr, betas=(0.5, 0.999))


    def fit_trans_classifier(self, train_xs, x_test, y_test, ratio):
        labels = torch.arange(self.n_rots).unsqueeze(0).expand((self.batch_size, self.n_rots)).long().cuda()
        celoss = nn.CrossEntropyLoss()
        print('Training')
        for epoch in range(self.n_epoch):
            self.netC.train()
            rp = np.random.permutation(len(train_xs))
            n_batch = 0
            sum_zs = torch.zeros((self.ndf, self.n_rots)).cuda()

            for i in range(0, len(train_xs), self.batch_size):
                self.netC.zero_grad()
                batch_range = min(self.batch_size, len(train_xs) - i)
                train_labels = labels
                if batch_range == len(train_xs) - i:
                    train_labels = torch.arange(self.n_rots).unsqueeze(0).expand((len(train_xs) - i, self.n_rots)).long().cuda()
                idx = np.arange(batch_range) + i
                xs = torch.from_numpy(train_xs[rp[idx]]).float().cuda()
                tc_zs, ce_zs = self.netC(xs)
                sum_zs = sum_zs + tc_zs.mean(0)
                tc_zs = tc_zs.permute(0, 2, 1)

                loss_ce = celoss(ce_zs, train_labels)
                er = self.lmbda * tc_loss(tc_zs, self.m) + loss_ce
                er.backward()
                self.optimizerC.step()
                n_batch += 1

            means = sum_zs.t() / n_batch
            means = means.unsqueeze(0)
            self.netC.eval()

            with torch.no_grad():
                val_probs_rots = np.zeros((len(y_test), self.n_rots))
                for i in range(0, len(x_test), self.batch_size):
                    batch_range = min(self.batch_size, len(x_test) - i)
                    idx = np.arange(batch_range) + i
                    xs = torch.from_numpy(x_test[idx]).float().cuda()
                    zs, fs = self.netC(xs)
                    zs = zs.permute(0, 2, 1)
                    diffs = ((zs.unsqueeze(2) - means) ** 2).sum(-1)

                    diffs_eps = self.eps * torch.ones_like(diffs)
                    diffs = torch.max(diffs, diffs_eps)
                    logp_sz = torch.nn.functional.log_softmax(-diffs, dim=2)

                    val_probs_rots[idx] = -torch.diagonal(logp_sz, 0, 1, 2).cpu().data.numpy()

                val_probs_rots = val_probs_rots.sum(1)
                f1_score = f_score(val_probs_rots, y_test, ratio)
                print("Epoch:", epoch, ", fscore: ", f1_score)
        return f1_score

# Training:

In [9]:
val_real, val_fake = test_data[test_data.fraud == 0][train_columns], test_data[test_data.fraud == 1][train_columns]

import numpy as np
#from data_loader import Data_Loader
#import opt_tc_tabular as tc
import argparse

def load_trans_data(args):
    
    train_real = X_train
    y_test_fscore = np.concatenate([np.zeros(len(val_real)), np.ones(len(val_fake))])
    ratio = 100.0 * len(val_real) / (len(val_real) + len(val_fake))

    n_train, n_dims = train_real.shape
    rots = np.random.randn(args.n_rots, n_dims, args.d_out)

    print('Calculating transforms')
    x_train = np.stack([train_real.dot(rot) for rot in rots], 2)
    val_real_xs = np.stack([val_real.dot(rot) for rot in rots], 2)
    val_fake_xs = np.stack([val_fake.dot(rot) for rot in rots], 2)
    x_test = np.concatenate([val_real_xs, val_fake_xs])
    return x_train, x_test, y_test_fscore, ratio


def train_anomaly_detector(args):
    x_train, x_test, y_test, ratio = load_trans_data(args)
    tc_obj = TransClassifierTabular(args)
    f_score = tc_obj.fit_trans_classifier(x_train, x_test, y_test, ratio)
    return f_score

# if __name__ == '__main__':
#     parser = argparse.ArgumentParser()
#     parser.add_argument('--lr', default=0.001, type=float)
#     parser.add_argument('--n_rots', default=32, type=int)
#     parser.add_argument('--batch_size', default=64, type=int)
#     parser.add_argument('--n_epoch', default=25, type=int)
#     parser.add_argument('--d_out', default=4, type=int)
#     parser.add_argument('--dataset', default='thyroid', type=str)
#     parser.add_argument('--exp', default='affine', type=str)
#     parser.add_argument('--c_pr', default=0, type=int)
#     parser.add_argument('--true_label', default=1, type=int)
#     parser.add_argument('--ndf', default=8, type=int)
#     parser.add_argument('--m', default=1, type=float)
#     parser.add_argument('--lmbda', default=0.1, type=float)
#     parser.add_argument('--eps', default=0, type=float)
#     parser.add_argument('--n_iters', default=500, type=int)

#     args = parser.parse_args()
#     print("Dataset: ", args.dataset)

#     if args.dataset == 'thyroid' or args.dataset == 'arrhythmia':
#         n_iters = args.n_iters
#         f_scores = np.zeros(n_iters)
#         for i in range(n_iters):
#             f_scores[i] = train_anomaly_detector(args)
#         print("AVG f1_score", f_scores.mean())
#     else:
#         train_anomaly_detector(args)

# A solution is to create an Args class 

# this classe is just to replace argsparse 
class Args():
  def __init__(self, depth, widen, batch_size,lr, n_epoch, type_trans, lmbda, m, reg, eps, class_ind,n_rots,d_out,ndf,n_iters,c_pr, dataset):
    self.depth, self.widen, self.batch_size,self.lr, self.n_epoch, self.type_trans, self.lmbda, self.m, self.reg, self.eps, self.class_ind, self.n_rots, self.d_out,self.ndf,self.n_iters,self.c_pr, self.dataset= depth, widen, batch_size,lr, n_epoch, type_trans, lmbda, m, reg, eps, class_ind,n_rots,d_out,ndf,n_iters,c_pr,dataset
args = Args(10, 4, 288,0.001,16,'complicated', 0.1,1,True,0,1,32,4,8,500,0, 'chora_zaml')

train_anomaly_detector(args)



Calculating transforms


RuntimeError: ignored