In [None]:
# !pip install torch torchvision sentencepiece
# !pip install git+https://github.com/openai/CLIP.git

import torch
from PIL import Image
from torchvision import transforms
from clip import clip
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import numpy as np
import PIL
from tqdm import tqdm
import argparse
import pickle

class load_np_dataset(torch.utils.data.Dataset):
    def __init__(self, imgs_path, targets_path, transform):
        self.data = np.load(imgs_path)
        self.targets = np.load(targets_path)
        self.transform = transform
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img , target = self.data[idx], self.targets[idx]
            
        img = PIL.Image.fromarray(img)
        if transform:
            img = self.transform(img)

        return img, target

def parsing():
    parser = argparse.ArgumentParser(description='Tunes a CIFAR Classifier with OE',
                                    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--aug', type=str, default='gaussian_noise',
                        help='select noise.')
    parser.add_argument('--batch_size', '-b', type=int,
                        default=64, help='Batch size.')
    parser.add_argument('--seed', type=int, default=1,
                        help='seed')
    parser.add_argument('--num_workers', type=int, 
                        default=0, help='number of workers')
    parser.add_argument('--transform', type=int, 
                        default=0, help='use transformation dataset')

    args = parser.parse_args()
    return args


In [None]:
import sys
sys.argv = ['', '--aug', 'gaussian_noise']

In [None]:

args = parsing()
args.aug = 'spatter'

device = "cuda" if torch.cuda.is_available() else "cpu"
model, transform = clip.load("ViT-B/32", device=device)


cifar10_path = '/storage/users/makhavan/CSI/finals/datasets/data/'
cifar10_dataset = CIFAR10(root=cifar10_path, train=True, download=True, transform=transform)

if args.transform:
    cifar_train_cor_img_path = f'/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/CIFAR-10-Train-R-A/{args.aug}.npy'
    cifar_train_cor_target_path = '/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/CIFAR-10-Train-R-A/labels-A.npy'
    train_aug_dataset = load_np_dataset(cifar_train_cor_img_path, cifar_train_cor_target_path, transform=transform)
else:
    cifar_train_cor_img_path = f'/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/CIFAR-10-Train-R-C/{args.aug}.npy'
    cifar_train_cor_target_path = '/storage/users/makhavan/CSI/finals/datasets/generalization_repo_dataset/CIFAR-10-Train-R-C/labels-C.npy'
    train_aug_dataset = load_np_dataset(cifar_train_cor_img_path, cifar_train_cor_target_path, transform=transform)

cifar10_loader = DataLoader(cifar10_dataset, shuffle=False, batch_size=args.batch_size, num_workers=args.num_workers)
aug_loader = DataLoader(train_aug_dataset, shuffle=False, batch_size=args.batch_size, num_workers=args.num_workers)



In [None]:
for imgs, target in cifar10_loader:
    break

for imgs_noisy, target_noisy in aug_loader:
    break

In [None]:
import matplotlib.pyplot as plt

In [None]:
idx = 12
img_ = imgs[idx]
noisy_img_ = imgs_noisy[idx]
fig, axis = plt.subplots(1, 2, figsize=(20, 10))
axis[0].imshow(img_.permute(1,2,0).detach().numpy())
axis[0].axis('off')
axis[1].imshow(noisy_img_.permute(1,2,0).detach().numpy())
axis[1].axis('off')

In [None]:
import pickle
import numpy as np

In [None]:
with open("./tensors/diffs_gaussian_noise.pkl", 'rb') as f:
    g = pickle.load(f)

with open("./tensors/diffs_target_gaussian_noise.pkl", 'rb') as f:
    t = pickle.load(f)

In [None]:
t, g = zip(*sorted(zip(t, g)))

In [None]:
np.mean(g[:5000])

In [None]:
for i in range(10):
    print(f"class {i}: {np.mean(g[i*5000:(i+1)*5000])}")

In [None]:
name = ['Megan', 'Harriet', 'Henry', 'Beth', 'George']
score_list = [9, 6, 5, 6, 10]
score, name = zip(*sorted(zip(score_list, name)))

In [None]:
score, name

# CLIP CHECK VALUES

In [None]:
import os
dir = "/storage/users/makhavan/CSI/exp09/clip_vec/outputs"

In [None]:
for file_name in os.listdir(dir):
    file_path = os.path.join(dir, file_name)
    with open(file_path, 'r') as f:
        values = []
        lines = f.readlines()
        for line in lines[1:]:
            values.append(float(line.split(':')[1].split('\n')[0]))
            
        print(file_name, np.mean(values), "Min: ", np.min(values), "Max: ", np.max(values), "Max diff: ", np.max(values) - np.min(values))


In [None]:
import torch
from PIL import Image
from torchvision import transforms
from clip import clip
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import numpy as np
import PIL
from tqdm import tqdm
import argparse
import pickle


device = "cuda" if torch.cuda.is_available() else "cpu"
model, transform = clip.load("ViT-B/32", device=device)


cifar10_path = '/storage/users/makhavan/CSI/finals/datasets/data/'
cifar10_dataset = CIFAR10(root=cifar10_path, train=True, download=True, transform=transform)
cifar10_loader = DataLoader(cifar10_dataset, shuffle=False, batch_size=16)

diffs = []
for i, data in enumerate(cifar10_loader):
    imgs_n, targets = data
    # imgs_n, imgs_aug = transform(imgs_n).to(device), transform(imgs_aug).to(device)
    imgs_n = imgs_n.to(device)
    imgs_n_features = model.encode_image(imgs_n)
    diffs.extend(torch.mean(imgs_n_features, dim=1).detach().cpu().numpy())


In [None]:
with open(f'./tensors/normal_data.pkl', 'wb') as f:
    pickle.dump(diffs, f)

In [None]:
i=1
class_diff = diffs[i*5000:i*5000 + 5000] / np.max(diffs[i*5000:i*5000 + 5000])
class_diff = torch.mean(torch.tensor(class_diff), dim=1)
class_diff_normalized = (class_diff - torch.mean(class_diff)) / torch.std(class_diff)

In [None]:
idices = [element for i, element in enumerate(class_diff_normalized) if element  < np.percentile(class_diff_normalized, 95)]

In [None]:
torch.max(class_diff), torch.min(class_diff)

In [1]:
import pandas as pd
import os
all_dfs = []
names= []
root='./csv_results/report_class_1/'
for filename in os.listdir(root):
    print(filename)
    df = pd.read_csv(root + filename)
    df.insert(loc=0, column='noise', value=filename.split('.')[0])
    all_dfs.append(df)
    names.append(filename.split('.')[0])

# combined_df = pd.concat(all_dfs, ignore_index=True)
# combined_df.to_csv(root + "combined.csv", index=False)

FileNotFoundError: [Errno 2] No such file or directory: './csv_results/report_class_1/'