In [1]:
import torch
import os
import numpy as np
os.chdir('C:/Users/weckbecker/DualView/')

from sklearn.linear_model import LogisticRegression as LR
from sklearn.svm import LinearSVC as SVC
from sklearn.metrics.pairwise import cosine_similarity
from random import sample as samp
from random import seed

In [2]:
# standard dataset
sample = torch.load('explanations/MNIST/std/basic_conv_std/dualview_0.001/samples_tensor')
label = torch.load('explanations/MNIST/std/basic_conv_std/dualview_0.001/labels_tensor')

# corrupted dataset
#sample = torch.load('explanations/MNIST/corrupt/basic_conv_corrupt/dualview_0.001/samples_tensor')
#label = torch.load('explanations/MNIST/corrupt/basic_conv_corrupt/dualview_0.001/labels_tensor')

In [3]:
sample.shape, label.shape

(torch.Size([60000, 100]), torch.Size([60000]))

In [4]:
# take entire data as training data

train_size = int(1* len(sample))
test_size = len(sample) - train_size

sample_train = sample[:train_size]
sample_test = sample[train_size:]
label_train = label[:train_size]
label_test = label[train_size:]

In [5]:
lr = LR(penalty='l2', tol=0.0000001, C=1.0, max_iter=1000)
lr.fit(sample_train, label_train)
lr_w = lr.coef_

svc = SVC(penalty = 'l2', tol=0.0000001, C=1.0, max_iter=10000, dual=False)
svc.fit(sample_train, label_train)
svc_w = svc.coef_

In [6]:
lr_w.shape

(10, 100)

In [7]:
from utils.models import load_model

model_name = 'basic_conv'
dataset_name = 'MNIST'
num_classes = 10
model_path = 'C:/Users/weckbecker/DualView/checkpoints/MNIST/std/basic_conv_std/MNIST_basic_conv'
device = 'cpu'

model = load_model(model_name, dataset_name, num_classes).to(device)
checkpoint = torch.load(model_path, map_location=device)
model.load_state_dict(checkpoint["model_state"])
model.to(device)
model.eval()

BasicConvModel(
  (features): Sequential(
    (conv-0): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1))
    (relu-0): ReLU()
    (conv-1): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1))
    (relu-1): ReLU()
    (conv-2): Conv2d(10, 5, kernel_size=(3, 3), stride=(1, 1))
    (relu-2): ReLU()
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (fc-0): Linear(in_features=2420, out_features=500, bias=True)
    (relu-3): ReLU()
    (fc-1): Linear(in_features=500, out_features=100, bias=True)
    (relu-4): ReLU()
  )
  (classifier): Linear(in_features=100, out_features=10, bias=False)
)

In [8]:
cnn_w = model.classifier.weight.detach().numpy()
print(np.average(np.diag(cosine_similarity(cnn_w, svc_w))))
print(np.average(np.diag(cosine_similarity(cnn_w, lr_w))))
print(np.average(np.diag(cosine_similarity(lr_w, svc_w))))

0.7107294380608492
0.850249240329069
0.8049249654039301


In [9]:
import torchvision
from torchvision import transforms

torch.no_grad()


trainset=torchvision.datasets.MNIST('C:/Users/weckbecker/DualView/src/datasets',
                                    transform = transforms.Compose([
                                    transforms.ToTensor()
                                    ]))
train_loader = torch.utils.data.DataLoader(trainset,
                                           batch_size=60000,
                                           shuffle=False)
data, labels = next(iter(train_loader))
logits = model(data)

In [10]:
# closed form solution for regression is inv(X.T @ X) @ X.T @ y
# implementation scikit learn also exists

logits_train = logits.detach().numpy()

from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(sample_train, logits_train)
linreg_w = lin_reg.coef_
linreg_w.shape

(10, 100)

In [11]:
print(np.average(np.diag(cosine_similarity(cnn_w, linreg_w))))
print(np.average(np.diag(cosine_similarity(svc_w, linreg_w))))
print(np.average(np.diag(cosine_similarity(lr_w, linreg_w))))

0.8021429
0.6237550319703999
0.7496541667140237


In [12]:
print(np.average(np.diag(cosine_similarity(linreg_w, cnn_w))))
print(np.average(np.diag(cosine_similarity(svc_w, cnn_w))))
print(np.average(np.diag(cosine_similarity(lr_w, cnn_w))))

0.8021429
0.7107294380608492
0.850249240329069


In [13]:
# Remove last 1000 datapoints

sample_train_cut = sample_train[:59000,:]
label_train_cut = label_train[:59000]
logits_train_cut = logits_train[:59000]

In [14]:
lr_2 = LR(penalty='l2', tol=0.0000001, C=1.0, max_iter=1000)
lr_2.fit(sample_train_cut, label_train_cut)
lr_w_2 = lr_2.coef_

svc_2 = SVC(penalty = 'l2', tol=0.0000001, C=1.0, max_iter=10000, dual=False)
svc_2.fit(sample_train_cut, label_train_cut)
svc_w_2 = svc_2.coef_

lin_reg_2 = LinearRegression()
lin_reg_2.fit(sample_train_cut, logits_train_cut)
linreg_w_2 = lin_reg_2.coef_

In [15]:
print(np.average(np.diag(cosine_similarity(svc_w, lr_w))))
print(np.average(np.diag(cosine_similarity(svc_w, linreg_w))))
print(np.average(np.diag(cosine_similarity(lr_w, linreg_w))))
print('')
print(np.average(np.diag(cosine_similarity(svc_w_2, lr_w_2))))
print(np.average(np.diag(cosine_similarity(svc_w_2, linreg_w_2))))
print(np.average(np.diag(cosine_similarity(lr_w_2, linreg_w_2))))

0.8049249654039301
0.6237550319703999
0.7496541667140237

0.801581701103179
0.6264896380054895
0.7515252965494627


# What regulatisation stays more faithful to weight vector of perceptron?

In [16]:
for power in range(-6,4,1):
    lr = LR(penalty='l2', tol=0.00001, C=10**power, max_iter=1000)
    lr.fit(sample_train, label_train)
    lr_w = lr.coef_
    print(f'Logistic regression, C = {10**power}\n\ Cosine similarity: {np.average(np.diag(cosine_similarity(lr_w, cnn_w)))}')

for power in range(-6,4,1):
    svc = SVC(penalty = 'l2', tol=0.00001, C=10**power, max_iter=10000, dual=False)
    svc.fit(sample_train, label_train)
    svc_w = svc.coef_
    print(f'SVC, C = {10**power}\n\ Cosine similarity: {np.average(np.diag(cosine_similarity(svc_w, cnn_w)))}')

  print(f'Logistic regression, C = {10**power}\n\ Cosine similarity: {np.average(np.diag(cosine_similarity(lr_w, cnn_w)))}')
  print(f'SVC, C = {10**power}\n\ Cosine similarity: {np.average(np.diag(cosine_similarity(svc_w, cnn_w)))}')


Logistic regression, C = 1e-06
\ Cosine similarity: 0.581425150057394
Logistic regression, C = 1e-05
\ Cosine similarity: 0.6129486973707714
Logistic regression, C = 0.0001
\ Cosine similarity: 0.6938531577662418
Logistic regression, C = 0.001
\ Cosine similarity: 0.7940323525656501
Logistic regression, C = 0.01
\ Cosine similarity: 0.8759043531233697
Logistic regression, C = 0.1
\ Cosine similarity: 0.9005550148086428
Logistic regression, C = 1
\ Cosine similarity: 0.8473164223742602
Logistic regression, C = 10
\ Cosine similarity: 0.7617532344357535
Logistic regression, C = 100
\ Cosine similarity: 0.69447742643438
Logistic regression, C = 1000
\ Cosine similarity: 0.671170134718768
SVC, C = 1e-06
\ Cosine similarity: 0.6246915479825653
SVC, C = 1e-05
\ Cosine similarity: 0.6464378787129859
SVC, C = 0.0001
\ Cosine similarity: 0.6443789172761785
SVC, C = 0.001
\ Cosine similarity: 0.6937400559517685
SVC, C = 0.01
\ Cosine similarity: 0.7222426014456499
SVC, C = 0.1
\ Cosine similarit