In [3]:
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import torch.nn.functional as F
import PreResNet
import math
import torchvision.models as models
import random
import os
import numpy as np
from matplotlib import pyplot as plt
import sys
sys.path.append('../')
from utils import *

In [48]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# CIFAR meta
mean = [0.4914, 0.4822, 0.4465]
std = [0.2023, 0.1994, 0.2010]

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

In [49]:
trainset = datasets.CIFAR10(root = '.', train=True, download=True, transform=transform_train)
trainset_track = datasets.CIFAR10(root = '.', train=True, transform=transform_train)
testset = datasets.CIFAR10(root = '.', train=False, transform=transform_test)
num_classes = 10

Files already downloaded and verified


In [50]:
train_loader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=1, pin_memory=True)
train_loader_track = torch.utils.data.DataLoader(trainset_track, batch_size=32, shuffle=False, num_workers=1, pin_memory=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=1, pin_memory=True)

In [51]:
model = PreResNet.ResNet18(num_classes=10).to(device)
for batch_idx, (inputs, targets) in enumerate(train_loader):
    print(inputs.size(), targets.size())
    model(inputs)
    break

python(30165) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


torch.Size([32, 3, 32, 32]) torch.Size([32])


In [27]:
train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=1, pin_memory=True)
train_loader_track = torch.utils.data.DataLoader(trainset_track, batch_size=128, shuffle=False, num_workers=1, pin_memory=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=1, pin_memory=True)
model = PreResNet.ResNet18(num_classes=num_classes).to(device)

In [13]:
for batch_idx, (data, target) in enumerate(train_loader):
    data, target = data.to(device), target.to(device)
    prediction = model(data)
    prediction = F.log_softmax(prediction, dim=1)
    idx_loss = F.nll_loss(prediction, target, reduction = 'none')
    break

In [16]:
idx_loss.shape

torch.Size([128])

In [10]:
all_losses = torch.Tensor()
all_predictions = torch.Tensor()
all_probs = torch.Tensor()
all_argmaxXentropy = torch.Tensor()

for batch_idx, (data, target) in enumerate(train_loader):
    data, target = data.to(device), target.to(device)
    prediction = model(data)

    prediction = F.log_softmax(prediction, dim=1)
    idx_loss = F.nll_loss(prediction, target, reduction = 'none')
    idx_loss.detach_()
    all_losses = torch.cat((all_losses, idx_loss.cpu()))
    probs = prediction.clone()
    probs.detach_()
    all_probs = torch.cat((all_probs, probs.cpu()))
    arg_entr = torch.max(prediction, dim=1)[1]
    arg_entr = F.nll_loss(prediction.float(), arg_entr.to(device), reduction='none')
    arg_entr.detach_()
    all_argmaxXentropy = torch.cat((all_argmaxXentropy, arg_entr.cpu()))

loss_tr = all_losses.data.numpy()

# outliers detection
max_perc = np.percentile(loss_tr, 95)
min_perc = np.percentile(loss_tr, 5)
loss_tr = loss_tr[(loss_tr<=max_perc) & (loss_tr>=min_perc)]

gmm_model_maxLoss = torch.FloatTensor([max_perc]).to(device)
gmm_model_minLoss = torch.FloatTensor([min_perc]).to(device) + 10e-6


loss_tr = (loss_tr - gmm_model_minLoss.data.cpu().numpy()) / (gmm_model_maxLoss.data.cpu().numpy() - gmm_model_minLoss.data.cpu().numpy() + 1e-6)

loss_tr[loss_tr>=1] = 1-10e-4
loss_tr[loss_tr <= 0] = 10e-4

gmm_model = GMM(n_components=2)
gmm_model.fit(loss_tr.reshape(-1, 1))

In [20]:
np.argmax(gmm_model.means_.flatten())

0

In [52]:
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [53]:
trainset = datasets.ImageFolder(root=os.path.join('.', 'tiny-imagenet-200', 'train'), transform=transform_train)
trainset_track = datasets.ImageFolder(root=os.path.join('.', 'tiny-imagenet-200', 'train'), transform=transform_train)
testset = datasets.ImageFolder(root=os.path.join('.', 'tiny-imagenet-200', 'val'), transform=transform_test)

In [54]:
train_loader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=1, pin_memory=True)
train_loader_track = torch.utils.data.DataLoader(trainset_track, batch_size=32, shuffle=False, num_workers=1, pin_memory=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=1, pin_memory=True)

In [55]:
for batch_idx, (inputs, targets) in enumerate(train_loader):
    print(inputs.size(), targets.size())
    break

python(30882) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


torch.Size([32, 3, 32, 32]) torch.Size([32])


In [56]:
model = PreResNet.ResNet18(num_classes=200).to(device)

In [57]:
for batch_idx, (inputs, targets) in enumerate(train_loader):
    print(inputs.size(), targets.size())
    model(inputs)
    break

python(30914) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


torch.Size([32, 3, 32, 32]) torch.Size([32])
