In [1]:
import math
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
import sklearn
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from scipy.special import softmax
from tqdm import tqdm 
from collections import Counter
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.simplefilter("always", ConvergenceWarning)

In [2]:
from maml.datasets.miniimagenet import MiniimagenetMetaDataset
from maml.models.gated_conv_net_original import ImpRegConvModel
from maml.models.conv_embedding_model import RegConvEmbeddingModel
from maml.logistic_regression_utils import logistic_regression_grad_with_respect_to_w, logistic_regression_hessian_pieces_with_respect_to_w, logistic_regression_hessian_with_respect_to_w, logistic_regression_mixed_derivatives_with_respect_to_w_then_to_X
from maml.logistic_regression_utils import logistic_regression_mixed_derivatives_with_respect_to_w_then_to_X_left_multiply

In [3]:
torch.cuda.set_device(0)

In [4]:
num_channels = 64
dataset = MiniimagenetMetaDataset(
    root='data',
    img_side_len=84,
    num_classes_per_batch=16,
    num_samples_per_class=600, # num train samples per class
    num_total_batches=1,
    num_val_samples=0, # num test samples per class
    meta_batch_size=1,
    split='train', # meta train/val/test
    num_workers=4,
    device='cuda')

MiniImagenet val


In [5]:
model = ImpRegConvModel(
        input_channels=dataset.input_size[0],
        output_size=dataset.output_size,
        num_channels=num_channels,
        img_side_len=dataset.input_size[1],
        use_max_pool=False, # currently not used
        verbose=False,
        use_group_norm=True,
        retain_activation=False)
state_dict = torch.load('./train_dir/impregmaml_minim_5w1s_sans_modulation_10_groupnorm/maml_impregconv_52000.pt')
model.load_state_dict(state_dict['model'])
model.to('cuda')

ImpRegConvModel(
  (features): Sequential(
    (layer1_conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (layer1_norm): GroupNorm(32, 64, eps=1e-05, affine=True)
    (layer1_lrelu): LeakyReLU(negative_slope=0.1)
    (layer1_max_pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (layer2_conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (layer2_norm): GroupNorm(32, 64, eps=1e-05, affine=True)
    (layer2_lrelu): LeakyReLU(negative_slope=0.1)
    (layer2_max_pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (layer3_conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (layer3_norm): GroupNorm(32, 64, eps=1e-05, affine=True)
    (layer3_lrelu): LeakyReLU(negative_slope=0.1)
    (layer3_max_pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (layer4_conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding

In [6]:
train_task_batch, test_task_batch = next(iter(dataset))

In [7]:
train_task = train_task_batch[0]
images = train_task.x.cpu().numpy().transpose(0,2,3,1)

In [8]:
assert train_task.x.size(0) == len(train_task.y)
Counter(train_task.y.detach().cpu().numpy())

Counter({3: 600,
         7: 600,
         9: 600,
         12: 600,
         0: 600,
         1: 600,
         6: 600,
         5: 600,
         8: 600,
         11: 600,
         15: 600,
         10: 600,
         13: 600,
         4: 600,
         14: 600,
         2: 600})

In [9]:
X = []
y = []
for i in tqdm(range(0, len(train_task.x), 100)):
    X.append(model(train_task.x[i:(i+100),:,:,:], modulation=None).detach().cpu().numpy())
    y.append(train_task.y[i:(i+100)].detach().cpu().numpy())

 19%|█▉        | 18/96 [00:00<00:43,  1.77it/s]

Before Modulation
tensor([320.9669, 306.1135, 309.2906, 315.7489, 319.2601, 374.0381, 294.7958,
        308.0747, 352.2834, 332.7489, 353.4807, 306.4835, 304.0098, 322.7579,
        353.4281, 323.0651, 315.7542, 311.6960, 322.7111, 319.1454, 384.0452,
        323.6081, 348.5122, 350.9153, 326.7862, 366.6815, 370.7954, 317.5385,
        361.9706, 305.5706, 375.3707, 354.7528, 321.2337, 341.3530, 346.0609,
        314.1640, 305.9853, 307.0463, 324.1154, 363.3754, 331.4963, 323.4962,
        318.1687, 300.4783, 334.7975, 318.1122, 313.7548, 312.7085, 348.3203,
        323.2894, 292.4219, 288.0349, 368.3343, 348.8164, 338.6063, 303.9785,
        347.2462, 325.0019, 355.7615, 368.3315, 350.5046, 311.4275, 341.8011,
        377.8944, 345.1176, 327.3841, 345.5174, 322.8875, 353.0949, 320.6765,
        370.5136, 353.2720, 307.4763, 305.5596, 349.3232, 350.5654, 382.0580,
        337.1615, 345.2189, 359.6410, 364.0529, 305.2354, 367.1454, 336.4810,
        310.9576, 350.8200, 364.6143, 333.1251

100%|██████████| 96/96 [00:01<00:00, 69.81it/s]


In [10]:
X = np.concatenate(X, axis=0)
y = np.concatenate(y, axis=0)

In [11]:
X.shape

(9600, 1601)

In [12]:
y.shape

(9600,)

In [21]:
l2_lambda = 0.001
with warnings.catch_warnings(record=True) as wn:
    lr_model = LogisticRegression(solver='lbfgs', penalty='l2', 
        C=1/l2_lambda, # now use _l2_lambda instead of 2 * _l2_lambda
        tol=1e-6, max_iter=1000,
        multi_class='multinomial', fit_intercept=False)
    lr_model.fit(X, y)

In [22]:
print(f"Accuracy : {np.sum(lr_model.predict(X) == y)*100./len(images)}")

Accuracy : 94.65625


## old

In [None]:
# get features 
X = model(train_task_batch[0].x, modulation=None)

In [None]:
X.shape

In [None]:
for train_task_batch, test_task_batch in iter(dataset):
    break

In [None]:
train_task = train_task_batch[0]
test_task = test_task_batch[0]
print(train_task.x.shape)

In [None]:
images = test_task.x.cpu().numpy().transpose(0,2,3,1)

In [None]:
plt.imshow(images[0])

In [None]:
plt.imshow(images[595])

In [None]:
plt.imshow(images[1190])

In [None]:
model = ImpRegConvModel(
        input_channels=dataset.input_size[0],
        output_size=dataset.output_size,
        num_channels=num_channels,
        img_side_len=dataset.input_size[1],
        use_max_pool=False, # currently not used
        verbose=False,
        use_group_norm=True,
        retain_activation=False)

In [None]:
state_dict = torch.load('./train_dir/impregmaml_minim_5w1s_sans_modulation_10_groupnorm/maml_impregconv_52000.pt')
model.load_state_dict(state_dict['model'])
model.to('cuda')

In [None]:
for a in model.features.named_children():
    print(a)

In [None]:
# model.eval()

In [None]:
X = model(train_task.x, modulation=None)

In [None]:
# result = []
# with torch.no_grad():
#     for i in range(192):
#         result.append(model(train_task.x[i * 50: (i+1) * 50], modulation=None))

In [None]:
# print(train_task.x.shape)
# X = torch.cat(result, dim=0)
# print(X.shape)

In [None]:
X = X.detach().cpu().numpy()
y = (train_task.y).cpu().numpy()

In [None]:
print(y)

In [None]:
l2_lambda = 10

In [None]:
with warnings.catch_warnings(record=True) as wn:
    lr_model = LogisticRegression(solver='lbfgs', penalty='l2', 
        C=1/l2_lambda, # now use _l2_lambda instead of 2 * _l2_lambda
        tol=1e-6, max_iter=1000,
        multi_class='multinomial', fit_intercept=False)
    lr_model.fit(X, y)

In [None]:
np.sum(lr_model.predict(X) == y)

In [None]:
indices = []
for i in range(5):
    for j in range(5):
        indices.append(595 * i + j)
print(indices)

In [None]:
X_test = model(test_task.x[indices], modulation=None)
X_test = X_test.detach().cpu().numpy()
y_test = (test_task.y[indices]).cpu().numpy()

In [None]:
# test_result = []
# test_result.append(model(test_task.x[0:5], modulation=None))
# test_result.append(model(test_task.x[595:600], modulation=None))

In [None]:
# X_test = model(test_task.x, modulation=None)
# X_test = X_test.detach().cpu().numpy()
# y_test = (test_task.y).cpu().numpy()

In [None]:
len(X_test)

In [None]:
print(test_task.x.shape)
X_test = torch.cat(test_result, dim=0)
print(X_test.shape)
X_test = X_test.detach().cpu().numpy()
y_test = (test_task.y).cpu().numpy()

In [None]:
print(y_test.shape)

In [None]:
np.sum(lr_model.predict(X_test) == y_test)

In [None]:
import matplotlib.pyplot as plt
from maml.datasets.task import plot_task

In [None]:
images = train_task.x.cpu().numpy().transpose(0,2,3,1)

In [None]:
plt.imshow(images[0])

In [None]:
plt.imshow(images[600])

In [None]:
plt.imshow(images[1200])

In [None]:
plt.imshow(images[1207])

In [None]:
plt.imshow(images[1800])

In [None]:
plt.imshow(images[1801])

In [None]:
plt.imshow(images[2400])

In [None]:
plt.imshow(images[2401])