In [1]:
import torch
import torch.nn as nn
MAX_GRAD_NORM = 1.2
EPSILON = 50.0
DELTA = 1e-10
EPOCHS = 3

LR = 1e-3

BATCH_SIZE = 120
MAX_PHYSICAL_BATCH_SIZE = 256
from torchvision.models.mobilenet import mobilenet_v2

In [2]:
from torchvision.datasets import MNIST, CIFAR10
import torchvision.transforms as transforms



DATA_ROOT = '../mnist'

transform = transforms.Compose([
    transforms.ToTensor(),
])

train_dataset = CIFAR10(
    root=DATA_ROOT, train=True, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
)

test_dataset = MNIST(
    root=DATA_ROOT, train=False, download=True, transform=transform)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
)

print(len(train_dataset))
dataiter = iter(train_loader)
images, labels = next(dataiter)
images[0].size()

Files already downloaded and verified
50000


torch.Size([3, 32, 32])

In [3]:
import numpy as np
import torch.optim as optim
from opacus.utils.batch_memory_manager import BatchMemoryManager


def train(model, train_loader, optimizer, epoch, device):
    model.train()
    criterion = nn.CrossEntropyLoss()

    losses = []
    top1_acc = []
    
    with BatchMemoryManager(
        data_loader=train_loader, 
        max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE, 
        optimizer=optimizer
    ) as memory_safe_data_loader:

        for i, (images, target) in enumerate(memory_safe_data_loader):   
            optimizer.zero_grad()
            images = images.to(device)
            target = target.to(device)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()

            # measure accuracy and record loss
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

            loss.backward()
            optimizer.step()
            

            if (i+1) % 200 == 0:
                epsilon = privacy_engine.get_epsilon(DELTA)
                print(
                    f"\tTrain Epoch: {epoch} \t"
                    f"Loss: {np.mean(losses):.6f} "
                    f"Acc@1: {np.mean(top1_acc) * 100:.6f} "
                    f"(ε = {epsilon:.5f}, δ = {DELTA})"
                )
    return np.mean(top1_acc), epsilon
    

In [4]:
def test(model, test_loader, device):
    model.eval()
    criterion = nn.CrossEntropyLoss()
    losses = []
    top1_acc = []

    with torch.no_grad():
        for images, target in test_loader:
            images = images.to(device)
            target = target.to(device)

            output = model(images)
            loss = criterion(output, target)
            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

    top1_avg = np.mean(top1_acc)

    print(
        f"\tTest set:"
        f"Loss: {np.mean(losses):.6f} "
        f"Acc: {top1_avg * 100:.6f} "
    )
    return np.mean(top1_acc)


from tqdm.notebook import tqdm

report = []

def accuracy(preds, labels):
    return (preds == labels).mean()

#for epoch in tqdm(range(EPOCHS), desc="Epoch", unit="epoch"):
#    results = train(model, train_loader, optimizer, epoch + 1, device)
#    report.append(results)

In [5]:
from opacus import PrivacyEngine
import torch
from torch import nn
from torchvision import models
from opacus.validators import ModuleValidator
model = models.resnet18(num_classes=10)

In [6]:
import pandas as pd
G_h =['eps_check','distortion','clip','q','k','theta']
GU_h =['eps_check','distortion','clip','q', 'a', 'b', 'k','theta', 'M']
#G250 = pd.read_csv("../arguments/G250_values.csv", names = G_h)
#GU250 = pd.read_csv("../arguments/GU_10Evalues.csv", names=GU_h)
#N250 = pd.read_csv("../arguments/N250_values.csv")

In [7]:
EPSILON = 5
import pandas as pd
GU_h =['eps_check','distortion','clip','q', 'a', 'b', 'k','theta', 'M']
GU250 = pd.read_csv("../arguments/GUEvalues.csv", names=GU_h)
GU250 = GU250.sort_values("eps_check")
GU250.replace([np.inf, -np.inf], np.nan, inplace=True)
GU250 = GU250.dropna()
GU250 = GU250.reset_index()
GU250.loc[(GU250['eps_check']  <= EPSILON) & (GU250['eps_check']  > EPSILON-0.1)].sort_values("distortion")

Unnamed: 0,index,eps_check,distortion,clip,q,a,b,k,theta,M
598,12695,4.944675,0.3215399,0.1,0.005,1e-05,1.1e-05,4.37243,0.922194,1322.354424
599,31391,4.944952,0.3215399,0.1,0.005,1e-05,1.3e-05,4.37243,0.922194,1322.354866
606,15386,4.969609,0.5262761,0.1,0.005,1e-05,1.1e-05,19.393939,0.103303,68.912626
602,12692,4.961639,0.6670193,0.1,0.005,1e-05,1.1e-05,4.37243,0.444548,82.611636
603,31388,4.961948,0.6670193,0.1,0.005,1e-05,1.3e-05,4.37243,0.444548,82.611955
597,22043,4.942968,5980840.0,0.1,0.005,0.0,1.1e-05,4.37243,0.922194,1322.351692
605,24734,4.967708,5980840.0,0.1,0.005,0.0,1.1e-05,19.393939,0.103303,68.910675
601,22040,4.959732,5980840.0,0.1,0.005,0.0,1.1e-05,4.37243,0.444548,82.609668
596,3347,4.942726,6815280.0,0.1,0.005,0.0,1e-05,4.37243,0.922194,1322.351305
604,6038,4.967439,6815280.0,0.1,0.005,0.0,1e-05,19.393939,0.103303,68.910399


In [8]:
GU250['eps_check'][69313]

KeyError: 69313

In [9]:
GU250.loc[(GU250['eps_check']  <= 0.5) & (GU250['eps_check']  > 0.49)].sort_values("distortion")

Unnamed: 0,index,eps_check,distortion,clip,q,a,b,k,theta,M


In [10]:
acc_plrv_2 = []
from torchvision.models.mobilenet import mobilenet_v2
from transformers import ViTForImageClassification, ViTConfig
for i in [598]:
    torch.cuda.empty_cache()
    args ={
        "a1":1,
        "a3":1,
        "a4":1,
        "lam":1,
        "moment":1,
        "theta":GU250['theta'][i],
        'k':GU250['k'][i],
        'mu':0,
        'sigma':0.5,
        'a':GU250['a'][i],
        'b':GU250['b'][i],
        'u':1,
        'l':0.1,
        'epsilon':1,
        'max_grad_norm': GU250['clip'][i],
        'gamma':True,
        'uniform':True,
        'truncnorm':False,
    }
    print(GU250['theta'][i])
    EPOCHS = 10
    BATCH_SIZE = 200
    #print()
    from torchvision.datasets import MNIST
    import torchvision.transforms as transforms



    DATA_ROOT = '../mnist'

    transform = transforms.Compose([
        transforms.ToTensor(),
    ])

    train_dataset = MNIST(
        root=DATA_ROOT, train=True, download=True, transform=transform)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
    )

    test_dataset = MNIST(
        root=DATA_ROOT, train=False, download=True, transform=transform)

    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
    )
    
    #gen_args = find_values(15, i)
    runs = []
    trun = []

    model = models.resnet18(num_classes=10)
    #model.classifier[1] = torch.nn.Linear(in_features=model.classifier[1].in_features, out_features=10)
    model.conv1 = nn.Conv2d(1, 28, kernel_size=3, stride=1, padding=1, bias=False)
    #model.load_state_dict(dic)
    #model.train()


    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ModuleValidator.fix(model)
    model = model.to(device)

    privacy_engine = PrivacyEngine(accountant = 'rdp_plrv')

    optimizer = optim.RMSprop(model.parameters(), lr=LR)
    print("Begin training " + str(i))
    model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
            module=model,
            optimizer=optimizer,
            data_loader=train_loader,
            noise_multiplier = 1,
            epochs=EPOCHS,
            target_epsilon=3,
            target_delta=DELTA,
            max_grad_norm=GU250['clip'][i],
            PLRV_args=args,
    )

    plrv_report_acc = []
    plrv_report_ep = []

    for epoch in tqdm(range(EPOCHS), desc="Epoch", unit="epoch"):
        acc, ep = train(model, train_loader, optimizer, epoch + 1, device)
        plrv_report_acc.append(acc)
        plrv_report_ep.append(ep)

    acc_plrv_2.append(plrv_report_ep)
    del model
    del optimizer
    #del results
#    torch.cuda.empty_cache()

0.922193863671697
Begin training 598




Epoch:   0%|          | 0/10 [00:00<?, ?epoch/s]



RuntimeError: Expected number of channels in input to be divisible by num_groups, but got input of shape [208, 28, 28, 28] and num_groups=32

In [None]:
acc_plrv_2 = []
from torchvision.models.mobilenet import mobilenet_v2
for i in [65546, 38231]:
    torch.cuda.empty_cache()
    args ={
        "a1":1,
        "a3":1,
        "a4":1,
        "lam":1,
        "moment":1,
        "theta":GU250['theta'][i],
        'k':GU250['k'][i],
        'mu':0,
        'sigma':0.5,
        'a':GU250['a'][i],
        'b':GU250['b'][i],
        'u':1,
        'l':0.1,
        'epsilon':1,
        'max_grad_norm': GU250['clip'][i],
        'gamma':True,
        'uniform':True,
        'truncnorm':False,
    }
    print(GU250['theta'][i])
    EPOCHS = 5
    BATCH_SIZE = 119
    #print()
    from torchvision.datasets import MNIST
    import torchvision.transforms as transforms



    DATA_ROOT = '../mnist'

    transform = transforms.Compose([
        transforms.ToTensor(),
    ])

    train_dataset = CIFAR10(
        root=DATA_ROOT, train=True, download=True, transform=transform)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
    )

    test_dataset = CIFAR10(
        root=DATA_ROOT, train=False, download=True, transform=transform)

    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
    )
    
    #gen_args = find_values(15, i)
    runs = []
    trun = []

    model = mobilenet_v2(num_classes=10)
    #model.classifier[1] = torch.nn.Linear(in_features=model.classifier[1].in_features, out_features=10)
    #model.conv1 = nn.Conv2d(1, 28, kernel_size=3, stride=1, padding=1, bias=False)
    #model.load_state_dict(dic)
    #model.train()


    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ModuleValidator.fix(model)
    model = model.to(device)

    privacy_engine = PrivacyEngine(accountant = 'rdp_plrv')

    optimizer = optim.RMSprop(model.parameters(), lr=LR)
    print("Begin training " + str(i))
    model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
            module=model,
            optimizer=optimizer,
            data_loader=train_loader,
            noise_multiplier = 1,
            epochs=EPOCHS,
            target_epsilon=3,
            target_delta=DELTA,
            max_grad_norm=GU250['clip'][i],
            PLRV_args=args,
    )

    plrv_report_acc = []
    plrv_report_ep = []
    privacy_engine.accountant.sample_rate = 1/len(train_loader)
    epsis = []
    for j in range(len(train_loader)*5):
        privacy_engine.accountant.history = [[args, j]]
        epsis.append(privacy_engine.get_epsilon(10e-10))
    acc_plrv_2.append(epsis)

In [None]:
acc_plrv_2 = [[(10.827088,0.96580), 
              (27.407932,1.96362),
              (41.499464,2.96142),
              (47.529392,3.95914),
              (52.126053,4.95723),
              ], [(10.192589,0.10905),
              (16.807797,0.20513),
              (27.030008,0.30121),
              (38.087848,0.39730),
              (46.760616,0.49338),]
             ]

In [None]:
max(acc_plrv_2[0])

In [None]:
acc_plrv = []
for i in range(1, len(GU250), int(len(GU250)/10)):
    torch.cuda.empty_cache()
    args ={
        "a1":1,
        "a3":1,
        "a4":1,
        "lam":1,
        "moment":1,
        "theta":GU250['theta'][i],
        'k':GU250['k'][i],
        'mu':0,
        'sigma':0.5,
        'a':GU250['a'][i],
        'b':GU250['b'][i],
        'u':1,
        'l':0.1,
        'epsilon':1,
        'max_grad_norm': GU250['b'][i],
        'gamma':True,
        'uniform':False,
        'truncnorm':False,
    }
    
    #gen_args = find_values(15, i)
    runs = []
    trun = []

    model = mobilenet_v2(num_classes=10)
    #model.classifier[1] = torch.nn.Linear(in_features=model.classifier[1].in_features, out_features=10)
    #model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    #model.load_state_dict(dic)
    #model.train()


    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ModuleValidator.fix(model)
    model = model.to(device)

    privacy_engine = PrivacyEngine(accountant = 'rdp_plrv')

    optimizer = optim.RMSprop(model.parameters(), lr=LR)
    print("Begin training " + str(i))
    model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
            module=model,
            optimizer=optimizer,
            data_loader=train_loader,
            noise_multiplier = 1,
            epochs=10,
            target_epsilon=0.15,
            target_delta=DELTA,
            max_grad_norm=GU250['clip'][i],
            PLRV_args=args,
    )

    plrv_report_acc = []
    plrv_report_ep = []

    for epoch in tqdm(range(5), desc="Epoch", unit="epoch"):
        acc, ep = train(model, train_loader, optimizer, epoch + 1, device)
        plrv_report_acc.append(acc)
        plrv_report_ep.append(ep)

    acc_plrv.append((G250['distortion'][i], test(model, test_loader, device)))
    del model
    del optimizer
    #del results
#    torch.cuda.empty_cache()

In [None]:
acc_plrv

In [None]:
g_eps = pd.read_csv('../arguments/gaussian_eps.csv', names = ['eps'])
g_dist = pd.read_csv('../arguments/gaussian_dist.csv', names = ['distortion'])
g_clip = pd.read_csv('../arguments/gaussian_clip.csv', names = ['clip'])

In [None]:
g_clip = g_clip.dropna()
g_clip = g_clip.reset_index()

In [None]:
acc_rdp = []
from torchvision.models.mobilenet import mobilenet_v2
for i in [0.5, 5]:
    print(i)
    #gen_args = find_values(15, i)
    runs = []
    trun = []

    model = mobilenet_v2(num_classes=10)
    #model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    #model.load_state_dict(dic)
    #model.train()


    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ModuleValidator.fix(model)
    model = model.to(device)
    
    privacy_engine = PrivacyEngine(accountant = 'rdp')

    optimizer = optim.RMSprop(model.parameters(), lr=LR)
    print("Begin training " + str(i))
    model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
            module=model,
            optimizer=optimizer,
            data_loader=train_loader,
            #noise_multiplier = 1,
            epochs=5,
            target_epsilon=i,
            target_delta=1e-10,
            max_grad_norm=10,
            #PLRV_args=convert_params(gen_args),
    )

    plrv_report_acc = []
    plrv_report_ep = []

    for epoch in tqdm(range(5), desc="Epoch", unit="epoch"):
        acc, ep = train(model, train_loader, optimizer, epoch + 1, device)
        plrv_report_acc.append(acc)
        plrv_report_ep.append(ep)

    acc_rdp.append((plrv_report_acc, plrv_report_ep))
    del model
    del optimizer
    #del results
    torch.cuda.empty_cache()
    
    privacy_engine.accountant.sample_rate = 1/len(train_loader)
    epsis = []
    for i in range(len(train_loader)*5):
        privacy_engine.accountant.history = [[args, i]]
        epsis.append(privacy_engine.get_epsilon(10e-10))
    acc_rdp.append(epsis)

In [None]:
from opacus.accountants.utils import get_noise_multiplier
acc_rdp = []
from torchvision.models.mobilenet import mobilenet_v2
for i in [0.5, 5]:
    print(i)
    #gen_args = find_values(15, i)
    runs = []
    trun = []

    model = mobilenet_v2(num_classes=10)
    #model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    #model.load_state_dict(dic)
    #model.train()


    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ModuleValidator.fix(model)
    model = model.to(device)
    
    privacy_engine = PrivacyEngine(accountant = 'rdp')

    optimizer = optim.RMSprop(model.parameters(), lr=LR)
    print("Begin training " + str(i))
    model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
            module=model,
            optimizer=optimizer,
            data_loader=train_loader,
            #noise_multiplier = 1,
            epochs=5,
            target_epsilon=i,
            target_delta=1e-10,
            max_grad_norm=10,
            #PLRV_args=convert_params(gen_args),
    )

    plrv_report_acc = []
    plrv_report_ep = []
    
    nm = noise_multiplier=get_noise_multiplier(
                target_epsilon=i,
                target_delta=DELTA,
                sample_rate=1/len(train_loader),
                epochs=EPOCHS,
                accountant=privacy_engine.accountant.mechanism(),
            )
    print(nm)
    #privacy_engine.accountant.sample_rate = BATCH_SIZE/50000
    epsis = []
    for j in range(len(train_loader)*5):
        privacy_engine.accountant.history = [(nm, 1/len(train_loader), j)]
        epsis.append(privacy_engine.get_epsilon(DELTA))
    acc_rdp.append(epsis)

In [None]:
 acc_rdp

In [None]:
acc_rdp = [[(10.116650,0.43046), 
              (10.711590,0.44728),
              (10.492322,0.46410),
              (10.288952,0.48092),
              (10.566023,0.49774),
              ], [(10.417261,4.40004),
              (10.726745,4.60714),
              (11.139498,4.75362),
              (12.236695,4.87810),
              (12.436983,4.99017),]
             ]

In [None]:

model = models.resnet18(num_classes=10)
model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model = ModuleValidator.fix(model)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
privacy_engine = PrivacyEngine(accountant = 'rdp_plrv')

optimizer = optim.RMSprop(model.parameters(), lr=LR)
model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
            module=model,
            optimizer=optimizer,
            data_loader=train_loader,
            noise_multiplier = 1,
            epochs=10,
            target_epsilon=0.15,
            target_delta=DELTA,
            max_grad_norm=GU250['clip'][i],
            PLRV_args=args,
    )
dic = model.state_dict()

In [None]:
del model

In [None]:
dist_x = [i[0] for i in acc_plrv_2]
acc_y = [i[1] for i in acc_plrv_2]
dist_g = [i[0] for i in acc_rdp]
acc_g = [i[1] for i in acc_rdp]
import matplotlib.pyplot as plt
plt.xlabel("distortion")
plt.ylabel("Accuracy")
plt.title("distortion vs accuracy")
plt.yscale('log')
fig, ax1 = plt.subplots()
ax1.plot(dist_x, acc_y, 'bo-', label="PLRV")
ax1.set_ylabel("Accuracy")
#ax1.set_yscale('log')
ax2 = ax1.twiny()
ax2.plot(dist_g, acc_g, 'ro-', label="Guassian")
#ax2.set_yscale('log')
plt.legend(loc="lower right")
plt.show()

In [None]:
acc_plrv_2.sort()
acc_rdp.sort()
dist_x = [i[2] for i in acc_plrv_2]
acc_y = [i[1] for i in acc_plrv_2]
dist_g = [i[0] for i in acc_rdp]
acc_g = [i[1] for i in acc_rdp]
import matplotlib.pyplot as plt
plt.xlabel("distortion")
plt.ylabel("Accuracy")
plt.title("distortion vs accuracy")
plt.yscale('log')
fig, ax1 = plt.subplots()
ax1.plot(dist_x, acc_y, 'bo-', label="PLRV")
ax1.set_ylabel("Accuracy")
ax1.set_xscale('log')
ax2 = ax1.twiny()
ax2.plot(dist_g, acc_g, 'ro-', label="Guassian")
#ax2.set_yscale('log')
plt.legend(loc="lower right")
plt.show()

In [None]:
import matplotlib.ticker as ticker
#acc_plrv_2.sort()
#acc_rdp.sort()
x_vals = range(1,6)
print(acc_plrv_2)
plrv1 = [i[1] for i in acc_plrv_2[0]]
plrv2 = [i[1] for i in acc_plrv_2[1]]
print(acc_rdp)
g1 = [i[1] for i in acc_rdp[0]]
g2 = [i[1] for i in acc_rdp[1]]
import matplotlib.pyplot as plt
plt.xlabel("Epochs")
plt.ylabel("Epsilon")
plt.title("Epochs vs Epsilon")
#plt.plot(x_vals, plrv1, 'b+-', label="PLRV 45% accuracy")
#plt.plot(x_vals, plrv2, 'bo--', label="PLRV 10% accuracy")
plt.plot(x_vals, g1, 'ro-', label="gaussian ϵ=0.5")
#plt.plot(x_vals, g2, 'r+-', label="gaussian ϵ=5")
plt.plot(x_vals, plrv2, 'bo-', label="PLRV  ϵ=0.5")
#plt.plot(x_vals, plrv1, 'b+-', label="PLRV  ϵ=5")
#plt.yticks(np.logspace(0, 200, 5)) 
#plt.yscale("log")
#plt.gca().yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{x:.2f}'))
plt.legend(loc="best")
plt.show()

In [None]:
import matplotlib.ticker as ticker
#acc_plrv_2.sort()
#acc_rdp.sort()
x_vals = range(1,2106)

plrv1 = acc_plrv_2[0]
plrv2 = acc_plrv_2[1]
g2 = acc_rdp[1]
g1 = acc_rdp[0]
import matplotlib.pyplot as plt
plt.xlabel("Steps")
plt.ylabel("Epsilon")
plt.title("Steps vs Epsilon")
#plt.plot(x_vals, plrv1, 'b+-', label="PLRV 45% accuracy")
#plt.plot(x_vals, plrv2, 'bo--', label="PLRV 10% accuracy")
plt.plot(x_vals, g1, 'r--', label="gaussian ϵ=0.5")
plt.plot(x_vals, g2, 'r-', label="gaussian ϵ=5")
plt.plot(x_vals, plrv2, 'b--', label="PLRV  ϵ=0.5")
plt.plot(x_vals, plrv1, 'b-', label="PLRV  ϵ=5")
#plt.yticks(np.logspace(0, 200, 5)) 
#plt.yscale("log")
#plt.gca().yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{x:.2f}'))
plt.legend(loc="best")
plt.show()

In [None]:
import matplotlib.ticker as ticker
#acc_plrv_2.sort()
#acc_rdp.sort()
x_vals = range(1,len(train_loader)+1)

plrv1 = acc_plrv_2[0][0:len(train_loader)]
plrv2 = acc_plrv_2[1][0:len(train_loader)]
g2 = acc_rdp[1][0:len(train_loader)]
g1 = acc_rdp[0][0:len(train_loader)]
import matplotlib.pyplot as plt
plt.xlabel("Steps")
plt.ylabel("Epsilon")
plt.title("Steps vs Epsilon")
#plt.plot(x_vals, plrv1, 'b+-', label="PLRV 45% accuracy")
#plt.plot(x_vals, plrv2, 'bo--', label="PLRV 10% accuracy")
plt.plot(x_vals, g1, 'r--', label="gaussian ϵ=0.5")
plt.plot(x_vals, g2, 'r-', label="gaussian ϵ=5")
plt.plot(x_vals, plrv2, 'b--', label="PLRV  ϵ=0.5")
plt.plot(x_vals, plrv1, 'b-', label="PLRV  ϵ=5")
#plt.yticks(np.logspace(0, 200, 5)) 
#plt.yscale("log")
#plt.gca().yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{x:.2f}'))
plt.legend(loc="best")
plt.show()

In [None]:
import matplotlib.ticker as ticker
#acc_plrv_2.sort()
#acc_rdp.sort()
x_vals = range(1,len(train_loader)+1)

plrv1 = acc_plrv_2[0][0:len(train_loader)]
plrv2 = acc_plrv_2[1][0:len(train_loader)]
g2 = acc_rdp[1][0:len(train_loader)]
g1 = acc_rdp[0][0:len(train_loader)]
import matplotlib.pyplot as plt
plt.xlabel("Steps")
plt.ylabel("Epsilon")
plt.title("Steps vs Epsilon")
#plt.plot(x_vals, plrv1, 'b+-', label="PLRV 45% accuracy")
#plt.plot(x_vals, plrv2, 'bo--', label="PLRV 10% accuracy")
plt.plot(x_vals, g1, 'r--', label="gaussian ϵ=0.5")
#plt.plot(x_vals, g2, 'r-', label="gaussian ϵ=5")
plt.plot(x_vals, plrv2, 'b--', label="PLRV  ϵ=0.5")
#plt.plot(x_vals, plrv1, 'b-', label="PLRV  ϵ=5")
#plt.yticks(np.logspace(0, 200, 5)) 
#plt.yscale("log")
#plt.gca().yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{x:.2f}'))
plt.legend(loc="best")
plt.show()

In [None]:
acc_plrv = []
for i in range(1, 11):
    gen_args = find_values(13, i)
    runs = []
    trun = []

    #model = models.resnet18(num_classes=10)
    #model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    model.load_state_dict(dic)
    model.train()


    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = model.to(device)

    privacy_engine = PrivacyEngine(accountant = 'rdp_plrv')

    optimizer = optim.RMSprop(model.parameters(), lr=LR)
    print("Begin training " + str(i))
    model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
            module=model,
            optimizer=optimizer,
            data_loader=train_loader,
            noise_multiplier = 1,
            epochs=EPOCHS,
            target_epsilon=0.15,
            target_delta=DELTA,
            max_grad_norm=i+1,
            PLRV_args=convert_params(gen_args),
    )

    plrv_report_acc = []
    plrv_report_ep = []

    for epoch in tqdm(range(1), desc="Epoch", unit="epoch"):
        acc, ep = train(model, train_loader, optimizer, epoch + 1, device)
        plrv_report_acc.append(acc)
        plrv_report_ep.append(ep)

    acc_plrv.append(test(model, test_loader, device))
#    del model
    del optimizer
    #del results
#    torch.cuda.empty_cache()

In [None]:
3**False

In [None]:
import sklearn.linear_model as lm

lm.LinearRegression().fit(X, plrv1)

In [None]:
X = [50000/120*1,50000/120*2,50000/120*3,50000/120*4,50000/120*5]

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Function to calculate the slope and intercept of the linear regression line
def linear_regression(x, y):
    # Using the formula for linear regression: y = mx + b
    # m = (N * Σ(xy) - Σx * Σy) / (N * Σ(x^2) - (Σx)^2)
    # b = (Σy - m * Σx) / N

    N = len(x)
    Σx = np.sum(x)
    Σy = np.sum(y)
    Σxy = np.sum(x * y)
    Σx2 = np.sum(x ** 2)

    # Calculate slope (m) and intercept (b)
    m = (N * Σxy - Σx * Σy) / (N * Σx2 - Σx ** 2)
    b = (Σy - m * Σx) / N

    return m, b

# Function to predict y values based on the linear model
def predict(x, m, b):
    return m * x + b

# Example input data (x and y)
x = np.array(X)
y = np.array(plrv1)

# Calculate the slope and intercept
m, b = linear_regression(x, y)

# Display the result
print(f"Linear regression equation: y = {m:.2f}x + {b:.2f}")

# Predict y values based on the regression line
y_pred = predict(x, m, b)

# Plot the data points and the regression line
plt.scatter(x, y, color='blue')
plt.plot(x, y_pred, color='red', label=f'Regression line: y = {m:.2E}x + {b:.2f}')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('PLRV Linear Regression')
plt.legend()
plt.show()

In [None]:
x = np.array(X)
y = np.array(plrv2)

# Calculate the slope and intercept
m, b = linear_regression(x, y)

# Display the result
print(f"Linear regression equation: y = {m:.2f}x + {b:.2f}")

# Predict y values based on the regression line
y_pred = predict(x, m, b)

# Plot the data points and the regression line
plt.scatter(x, y, color='blue', label='Data points')
plt.plot(x, y_pred, color='red', label=f'Regression line: y = {m:.2E}x + {b:.2f}')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Linear Regression')
plt.legend()
plt.show()

In [None]:
x = np.array(X)
y = np.log(np.array(g1))

# Calculate the slope and intercept
m, b = linear_regression(x, y)

# Display the result
print(f"Linear regression equation: y = {m:.2E}x + {b:.2f}")

# Predict y values based on the regression line
y_pred = predict(x, m, b)

# Plot the data points and the regression line
plt.scatter(x, y, color='blue', label='Data points')
plt.plot(x, y_pred, color='red', label=f'Regression line: y = {m:.2f}ln(x) + {b:.2f}')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Linear Regression')
plt.legend()
plt.show()

In [None]:
x = np.log(np.array(X))
y = np.array(g2)

# Calculate the slope and intercept
m, b = linear_regression(x, y)

# Display the result
print(f"Linear regression equation: y = {m:.2E}ln(x) + {b:.2f}")

# Predict y values based on the regression line
y_pred = predict(x, m, b)

# Plot the data points and the regression line
plt.scatter(x, y, color='blue', label='Data points')
plt.plot(x, y_pred, color='red', label=f'Regression line: y = {m:.2f}ln(x) + {b:.2f}')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Linear Regression')
plt.legend()
plt.show()

In [None]:
len(acc_plrv_2[1])

In [None]:
import torch
roberta = torch.hub.load('pytorch/fairseq', 'roberta.large')

In [None]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

import torch
from datasets import load_dataset
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Load the Question NLI dataset
dataset = load_dataset("snli")
# Load the Roberta tokenizer
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

# Preprocess the data
def preprocess_function(examples):
    return tokenizer(examples['premise'], examples['hypothesis'], truncation=True, padding=True)

# Apply preprocessing to train, validation, and test sets
tokenized_datasets = dataset.map(preprocess_function, batched=True)
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets = tokenized_datasets.filter(lambda example: example['labels'] != -1)

# Load the Roberta model for sequence classification
model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=3)

# Define the metric for evaluation
def compute_metrics(p):
    predictions, labels = p
    predictions = torch.argmax(torch.tensor(predictions), dim=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Set up training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",# Evaluate after every epoch
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    num_train_epochs=3,
    logging_dir="./logs",
    load_best_model_at_end=True,
)

device = torch.device("cpu")
model.to(device)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=lambda data: tokenizer.pad(data, padding=True, return_tensors="pt")  # Use dynamic padding
)

trainer.args.device = "cpu"

# Train the model
trainer.train()

# Evaluate on the test set
test_results = trainer.evaluate(tokenized_datasets["test"])

# Print the final test results
print(f"Test results: {test_results}")


In [None]:
print(set(tokenized_datasets['train']['labels']))  # Check label values in the training set
print(set(tokenized_datasets['validation']['labels']))  # Check label values in the validation set

In [None]:
max(tokenized_datasets['train']['labels'])

In [None]:
import os
from datasets import load_dataset
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score
import torch

# Ensure that the model runs on the CPU
device = torch.device("cpu")

# Load the Question NLI dataset
dataset = load_dataset("snli")

# Load the Roberta tokenizer
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

# Preprocess the data
def preprocess_function(examples):
    # Tokenize the premise and hypothesis
    return tokenizer(
        examples['premise'], 
        examples['hypothesis'], 
        truncation=True, 
        padding='max_length', 
        max_length=512  # Ensure input size consistency
    )

# Apply preprocessing to train, validation, and test sets
tokenized_datasets = dataset.map(preprocess_function, batched=True)

# Ensure that the labels are correctly passed as "labels" in the dataset
# In SNLI, the labels are already present in the 'label' field. We rename it to 'labels'
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")

# Remove samples with label -1 (if they exist)
tokenized_datasets = tokenized_datasets.filter(lambda example: example['labels'] != -1)

# Check to ensure there are no remaining -1 labels
print(set(tokenized_datasets['train']['labels']))  # Check the label values after filtering

# Load the Roberta model for sequence classification
model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=3)

# Move the model to CPU
#model.to(device)

# Define the metric for evaluation
def compute_metrics(p):
    predictions, labels = p
    predictions = torch.argmax(torch.tensor(predictions), dim=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Set up training arguments to run on CPU
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",  # Evaluate after every epoch
    save_strategy="epoch",        # Save after every epoch
    per_device_train_batch_size=8,  # Lower batch size for CPU
    per_device_eval_batch_size=8,   # Lower batch size for CPU
    num_train_epochs=3,
    logging_dir="./logs",
    load_best_model_at_end=True,
    use_cpu=False,  # Disable CUDA to force CPU
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    compute_metrics=compute_metrics,
)

# Train the model
trainer.train()

# Evaluate on the test set
test_results = trainer.evaluate(tokenized_datasets["test"])

# Print the final test results
print(f"Test results: {test_results}")


In [None]:
!pip install datasets transformers[torch] scikit-learn