In [None]:
# !pip install torch torchvision sentencepiece
# !pip install git+https://github.com/openai/CLIP.git

import torch
from PIL import Image
from torchvision import transforms
from clip import clip
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import numpy as np
import PIL
from tqdm import tqdm
import argparse
import pickle

class load_np_dataset(torch.utils.data.Dataset):
    def __init__(self, imgs_path, targets_path, transform):
        self.data = np.load(imgs_path)
        self.targets = np.load(targets_path)
        self.transform = transform
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img , target = self.data[idx], self.targets[idx]
            
        img = PIL.Image.fromarray(img)
        if transform:
            img = self.transform(img)

        return img, target

def parsing():
    parser = argparse.ArgumentParser(description='Tunes a CIFAR Classifier with OE',
                                    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--aug', type=str, default='gaussian_noise',
                        help='select noise.')
    parser.add_argument('--batch_size', '-b', type=int,
                        default=64, help='Batch size.')
    parser.add_argument('--seed', type=int, default=1,
                        help='seed')
    parser.add_argument('--num_workers', type=int, 
                        default=0, help='number of workers')
    parser.add_argument('--transform', type=int, 
                        default=0, help='use transformation dataset')

    args = parser.parse_args()
    return args


In [None]:
import sys
sys.argv = ['', '--aug', 'gaussian_noise']

In [None]:

args = parsing()
args.aug = 'spatter'

device = "cuda" if torch.cuda.is_available() else "cpu"
model, transform = clip.load("ViT-B/32", device=device)


cifar10_path = '/storage/users/makhavan/CSI/finals/datasets/data/'
cifar10_dataset = CIFAR10(root=cifar10_path, train=True, download=True, transform=transform)

if args.transform:
    cifar_train_cor_img_path = f'/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/CIFAR-10-Train-R-A/{args.aug}.npy'
    cifar_train_cor_target_path = '/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/CIFAR-10-Train-R-A/labels-A.npy'
    train_aug_dataset = load_np_dataset(cifar_train_cor_img_path, cifar_train_cor_target_path, transform=transform)
else:
    cifar_train_cor_img_path = f'/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/CIFAR-10-Train-R-C/{args.aug}.npy'
    cifar_train_cor_target_path = '/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/CIFAR-10-Train-R-C/labels-C.npy'
    train_aug_dataset = load_np_dataset(cifar_train_cor_img_path, cifar_train_cor_target_path, transform=transform)

cifar10_loader = DataLoader(cifar10_dataset, shuffle=False, batch_size=args.batch_size, num_workers=args.num_workers)
aug_loader = DataLoader(train_aug_dataset, shuffle=False, batch_size=args.batch_size, num_workers=args.num_workers)



In [None]:
for imgs, target in cifar10_loader:
    break

for imgs_noisy, target_noisy in aug_loader:
    break

In [None]:
import matplotlib.pyplot as plt

In [None]:
idx = 12
img_ = imgs[idx]
noisy_img_ = imgs_noisy[idx]
fig, axis = plt.subplots(1, 2, figsize=(20, 10))
axis[0].imshow(img_.permute(1,2,0).detach().numpy())
axis[0].axis('off')
axis[1].imshow(noisy_img_.permute(1,2,0).detach().numpy())
axis[1].axis('off')

In [None]:
import pickle
import numpy as np

In [None]:
with open("./tensors/diffs_gaussian_noise.pkl", 'rb') as f:
    g = pickle.load(f)

with open("./tensors/diffs_target_gaussian_noise.pkl", 'rb') as f:
    t = pickle.load(f)

In [None]:
t, g = zip(*sorted(zip(t, g)))

In [None]:
np.mean(g[:5000])

In [None]:
for i in range(10):
    print(f"class {i}: {np.mean(g[i*5000:(i+1)*5000])}")

In [None]:
name = ['Megan', 'Harriet', 'Henry', 'Beth', 'George']
score_list = [9, 6, 5, 6, 10]
score, name = zip(*sorted(zip(score_list, name)))

In [None]:
score, name

# CLIP CHECK VALUES

In [None]:
import os
dir = "/storage/users/makhavan/CSI/exp09/clip_vec/outputs"

In [None]:
for file_name in os.listdir(dir):
    file_path = os.path.join(dir, file_name)
    with open(file_path, 'r') as f:
        values = []
        lines = f.readlines()
        for line in lines[1:]:
            values.append(float(line.split(':')[1].split('\n')[0]))
            
        print(file_name, np.mean(values), "Min: ", np.min(values), "Max: ", np.max(values), "Max diff: ", np.max(values) - np.min(values))


In [None]:
import torch
from PIL import Image
from torchvision import transforms
from clip import clip
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import numpy as np
import PIL
from tqdm import tqdm
import argparse
import pickle


device = "cuda" if torch.cuda.is_available() else "cpu"
model, transform = clip.load("ViT-B/32", device=device)


cifar10_path = '/storage/users/makhavan/CSI/finals/datasets/data/'
cifar10_dataset = CIFAR10(root=cifar10_path, train=True, download=True, transform=transform)
cifar10_loader = DataLoader(cifar10_dataset, shuffle=False, batch_size=16)

diffs = []
for i, data in enumerate(cifar10_loader):
    imgs_n, targets = data
    # imgs_n, imgs_aug = transform(imgs_n).to(device), transform(imgs_aug).to(device)
    imgs_n = imgs_n.to(device)
    imgs_n_features = model.encode_image(imgs_n)
    diffs.extend(torch.mean(imgs_n_features, dim=1).detach().cpu().numpy())


In [None]:
with open(f'./tensors/normal_data.pkl', 'wb') as f:
    pickle.dump(diffs, f)

In [None]:
i=1
class_diff = diffs[i*5000:i*5000 + 5000] / np.max(diffs[i*5000:i*5000 + 5000])
class_diff = torch.mean(torch.tensor(class_diff), dim=1)
class_diff_normalized = (class_diff - torch.mean(class_diff)) / torch.std(class_diff)

In [None]:
idices = [element for i, element in enumerate(class_diff_normalized) if element  < np.percentile(class_diff_normalized, 95)]

In [None]:
torch.max(class_diff), torch.min(class_diff)

In [1]:
import pandas as pd
import os
all_dfs = []
names= []
root='./csv_results/report_class_1/'
for filename in os.listdir(root):
    print(filename)
    df = pd.read_csv(root + filename)
    df.insert(loc=0, column='noise', value=filename.split('.')[0])
    all_dfs.append(df)
    names.append(filename.split('.')[0])

# combined_df = pd.concat(all_dfs, ignore_index=True)
# combined_df.to_csv(root + "combined.csv", index=False)

FileNotFoundError: [Errno 2] No such file or directory: './csv_results/report_class_1/'

In [28]:
# !pip install torch torchvision sentencepiece
# !pip install git+https://github.com/openai/CLIP.git

import torch
from PIL import Image
from torchvision import transforms
from clip import clip
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import numpy as np
import PIL
from tqdm import tqdm
import argparse
import pickle

class load_np_dataset(torch.utils.data.Dataset):
    def __init__(self, imgs_path, targets_path, transform):
        self.data = np.load(imgs_path)
        self.targets = np.load(targets_path)
        self.transform = transform
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img , target = self.data[idx], self.targets[idx]
            
        img = PIL.Image.fromarray(img)
        if transform:
            img = self.transform(img)

        return img, target

def parsing():
    parser = argparse.ArgumentParser(description='Tunes a CIFAR Classifier with OE',
                                    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--aug', type=str, default='gaussian_noise',
                        help='select noise.')
    parser.add_argument('--batch_size', '-b', type=int,
                        default=64, help='Batch size.')
    parser.add_argument('--seed', type=int, default=1,
                        help='seed')
    parser.add_argument('--num_workers', type=int, 
                        default=0, help='number of workers')
    parser.add_argument('--transform', type=int, 
                        default=0, help='use transformation dataset')

    args = parser.parse_args()
    return args

import sys
sys.argv = ['']
args = parsing()



device = "cuda" if torch.cuda.is_available() else "cpu"
model, transform = clip.load("ViT-B/32", device=device)


# cifar10_path = '/storage/users/makhavan/CSI/finals/datasets/data/'
# cifar10_dataset = CIFAR10(root=cifar10_path, train=True, download=True, transform=transform)

# if args.transform:
#     cifar_train_cor_img_path = f'/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/CIFAR-10-Train-R-A/{args.aug}.npy'
#     cifar_train_cor_target_path = '/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/CIFAR-10-Train-R-A/labels-A.npy'
#     train_aug_dataset = load_np_dataset(cifar_train_cor_img_path, cifar_train_cor_target_path, transform=transform)
# else:
#     cifar_train_cor_img_path = f'/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/CIFAR-10-Train-R-C/{args.aug}.npy'
#     cifar_train_cor_target_path = '/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/CIFAR-10-Train-R-C/labels-C.npy'
#     train_aug_dataset = load_np_dataset(cifar_train_cor_img_path, cifar_train_cor_target_path, transform=transform)

# cifar10_loader = DataLoader(cifar10_dataset, shuffle=False, batch_size=args.batch_size, num_workers=args.num_workers)
# aug_loader = DataLoader(train_aug_dataset, shuffle=False, batch_size=args.batch_size, num_workers=args.num_workers)


# loader = zip(cifar10_loader, aug_loader)
# diffs = []
# targets_list = []
# for i, data in enumerate(tqdm(loader)):
#     data_normal, data_aug = data
#     imgs_n, targets = data_normal
#     imgs_aug, _ = data_aug
#     # imgs_n, imgs_aug = transform(imgs_n).to(device), transform(imgs_aug).to(device)
#     if len(imgs_n) != len(imgs_aug): # if len imgs_aug was larger than imgs_normal
#         imgs_aug = imgs_aug[:len(imgs_n)]
#         imgs_n, imgs_aug = imgs_n.to(device), imgs_aug.to(device)
#         imgs_n_features = model.encode_image(imgs_n)
#         imgs_aug_features = model.encode_image(imgs_aug)
#         diffs.extend(torch.sum(torch.pow((imgs_n_features - imgs_aug_features), 2), dim=1).detach().cpu().numpy())
#         targets_list.extend(targets.detach().cpu().numpy())
#         break

#     imgs_n, imgs_aug = imgs_n.to(device), imgs_aug.to(device)
#     imgs_n_features = model.encode_image(imgs_n)
#     imgs_aug_features = model.encode_image(imgs_aug)
#     diffs.extend(torch.sum(torch.pow((imgs_n_features - imgs_aug_features), 2), dim=1).detach().cpu().numpy())
#     targets_list.extend(targets.detach().cpu().numpy())

# diffs = np.asarray(diffs)
# targets_list = np.asarray(targets_list)
# with open(f'./tensors/diffs_{args.aug}.pkl', 'wb') as f:
#     pickle.dump(diffs, f)
# with open(f'./tensors/diffs_target_{args.aug}.pkl', 'wb') as f:
#     pickle.dump(targets_list, f)


# t, g = zip(*sorted(zip(targets_list, diffs)))
# for i in range(10):
#     print(f"class {i}: {np.mean(g[i*5000:(i+1)*5000])}")


# vals_softmax=[]
# for i in range(10):
#     vals = torch.tensor(np.mean(g[i*5000:(i+1)*5000]))
#     vals_softmax.append(torch.nn.functional.softmax(vals).detach().cpu().numpy())

# with open(f'./softmax/{args.aug}.out', 'w') as file:
#     for val in vals_softmax:
#         file.write(str(val)+'\n')

In [29]:
import torchvision
def load_svhn(svhn_path):

    # mean = [x / 255 for x in [125.3, 123.0, 113.9]]
    # std = [x / 255 for x in [63.0, 62.1, 66.7]]
    # transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
    print('loading SVHN')
    train_data = torchvision.datasets.SVHN(root=svhn_path, split="train", transform=transform)

    test_data = torchvision.datasets.SVHN(root=svhn_path, split="test", transform=transform)

    # train_data.targets = train_data.targets.astype('int64')
    # test_data.targets = test_data.targets.astype('int64')
    
    return train_data, test_data


In [30]:
svhn_path = '/storage/users/makhavan/CSI/finals/datasets/data/'
train_data, test_data = load_svhn(svhn_path)
svh_train = DataLoader(train_data, shuffle=False, batch_size=args.batch_size, num_workers=args.num_workers)
svh_test = DataLoader(test_data, shuffle=False, batch_size=args.batch_size, num_workers=args.num_workers)

loading SVHN


In [31]:
svhn_train_cor_img_path = f'/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/SVHN-R-C/{args.aug}.npy'
svhn_train_cor_target_path = f'/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/SVHN-R-C/labels-C.npy'

In [32]:
train_aug_dataset = load_np_dataset(svhn_train_cor_img_path, svhn_train_cor_target_path, transform=transform)
aug_loader = DataLoader(train_aug_dataset, shuffle=False, batch_size=args.batch_size, num_workers=args.num_workers)

In [33]:
loader = zip(svh_train, aug_loader)
diffs = []
targets_list = []
for i, data in enumerate(tqdm(loader)):
    data_normal, data_aug = data
    imgs_n, targets = data_normal
    imgs_aug, _ = data_aug
    # imgs_n, imgs_aug = transform(imgs_n).to(device), transform(imgs_aug).to(device)
    if len(imgs_n) != len(imgs_aug): # if len imgs_aug was larger than imgs_normal
        imgs_aug = imgs_aug[:len(imgs_n)]
        imgs_n, imgs_aug = imgs_n.to(device), imgs_aug.to(device)
        imgs_n_features = model.encode_image(imgs_n)
        imgs_aug_features = model.encode_image(imgs_aug)
        diffs.extend(torch.sum(torch.pow((imgs_n_features - imgs_aug_features), 2), dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())
        break

    imgs_n, imgs_aug = imgs_n.to(device), imgs_aug.to(device)
    imgs_n_features = model.encode_image(imgs_n)
    imgs_aug_features = model.encode_image(imgs_aug)
    diffs.extend(torch.sum(torch.pow((imgs_n_features - imgs_aug_features), 2), dim=1).detach().cpu().numpy())
    targets_list.extend(targets.detach().cpu().numpy())
    break

0it [00:00, ?it/s]

0it [00:00, ?it/s]


In [35]:
diffs

[55.75,
 45.0,
 42.94,
 45.62,
 37.16,
 49.1,
 68.56,
 67.75,
 25.5,
 18.84,
 27.53,
 49.38,
 42.38,
 27.55,
 53.8,
 50.03,
 42.03,
 33.53,
 34.7,
 28.47,
 52.75,
 36.6,
 35.16,
 21.19,
 49.75,
 49.22,
 70.75,
 45.0,
 64.4,
 57.16,
 40.62,
 46.62,
 40.8,
 49.47,
 33.3,
 31.42,
 26.31,
 21.14,
 77.8,
 96.75,
 91.8,
 36.8,
 50.7,
 53.62,
 31.94,
 37.3,
 52.8,
 34.2,
 23.22,
 60.88,
 52.53,
 35.38,
 20.02,
 42.25,
 55.12,
 32.2,
 31.52,
 44.56,
 26.98,
 35.9,
 25.44,
 46.56,
 45.7,
 33.8]