In [3]:
import torch
import torch.nn as nn
import pickle
import os
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm as tqdm
from PIL import Image as img

np.set_printoptions(suppress=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Prepare CIFAR-10 Data (10K)

In [5]:
# !gunzip cifar-10-python.tar.gz
# !tar -xf cifar-10-python.tar
# !rm cifar-10-python.tar

In [6]:
files = os.listdir('cifar-10-batches-py/')
files.remove('readme.html')
files.remove('batches.meta')
files

['data_batch_4',
 'data_batch_5',
 'data_batch_1',
 'data_batch_3',
 'test_batch',
 'data_batch_2']

In [12]:
path = 'cifar-10-batches-py/'
images, labels = [], []
for i, file in enumerate(files):
    data = pickle.load(open(path + file, 'rb'), encoding='bytes')
    images.append(data[b'data'])
    labels.append(data[b'labels'])
    images = np.concatenate(images, axis=0)
    labels = np.concatenate(labels, axis=0)

labels = np.array(labels, dtype=np.int8)
images.shape, labels.shape

AttributeError: 'numpy.ndarray' object has no attribute 'append'

In [8]:
classes = np.unique(labels)
classes

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int8)

In [10]:
class_map = dict()
for c in classes:
    class_map[c] = np.arange(labels.shape[0])[labels == c]
class_map

{0: array([    0,     2,    29, ..., 59974, 59982, 59988]),
 1: array([    6,     9,    14, ..., 59971, 59983, 59989]),
 2: array([    3,     5,     7, ..., 59981, 59985, 59998]),
 3: array([   13,    15,    22, ..., 59922, 59929, 59994]),
 4: array([    8,    20,    37, ..., 59957, 59980, 59990]),
 5: array([   10,    16,    17, ..., 59972, 59993, 59999]),
 6: array([    1,    11,    12, ..., 59967, 59970, 59984]),
 7: array([    4,    30,    32, ..., 59978, 59986, 59997]),
 8: array([   18,    26,    34, ..., 59991, 59995, 59996]),
 9: array([   27,    38,    43, ..., 59953, 59976, 59992])}

In [None]:
samples = np.empty(0, dtype=np.int8)
for c in class_map:
    temp = np.random.choice(class_map[c], 1000, replace=False)
    samples = np.concatenate((samples, temp))

samples.sort()
samples

array([    0,     8,    11, ..., 59986, 59993, 59995])

In [None]:
images_10k = images[samples]
labels_10k = labels[samples]

images_10k.shape, labels_10k.shape

((10000, 3072), (10000,))

In [None]:
images_10k = images_10k.astype(np.float32)
images_10k = images_10k / 255.0
images_10k = images_10k.reshape((-1, 3, 32, 32))
images_10k.shape

(10000, 3, 32, 32)

In [None]:
images_10k = torch.FloatTensor(images_10k)
labels_10k = torch.IntTensor(labels_10k)

In [None]:
data_10k = {'images' : images_10k, 'labels' : labels_10k}

In [None]:
with open('data_10k.pickle', 'wb') as f:
    pickle.dump(data_10k, f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
!cp /content/data_10k.pickle /content/drive/MyDrive/GNN_On_Image

## Prepare CIFAR-10 Data (Complete)

In [None]:
!cp /content/drive/MyDrive/GNN_On_Image/cifar-10-python.tar.gz .

In [None]:
!gunzip /content/cifar-10-python.tar.gz
!tar -xf /content/cifar-10-python.tar
!rm /content/cifar-10-python.tar

In [None]:
files = os.listdir('/content/cifar-10-batches-py/')
files.remove('readme.html')
files.remove('batches.meta')
files

['test_batch',
 'data_batch_5',
 'data_batch_2',
 'data_batch_3',
 'data_batch_1',
 'data_batch_4']

In [None]:
path = '/content/cifar-10-batches-py/'
for i, file in enumerate(files):
    data = pickle.load(open(path + file, 'rb'), encoding='bytes')
    if not i:
        images = data[b'data']
        labels = data[b'labels']
    else:
        images = np.concatenate((images, data[b'data']), axis=0)
        labels = labels + data[b'labels']

labels = np.array(labels, dtype=np.int8)
images.shape, labels.shape

((60000, 3072), (60000,))

In [None]:
images = images.astype(np.float32)
images = images / 255.0
images = images.reshape((-1, 3, 32, 32))
images.shape

(60000, 3, 32, 32)

In [None]:
images = torch.FloatTensor(images)
labels = torch.IntTensor(labels)

In [None]:
data = {'images' : images, 'labels' : labels}

In [None]:
with open('data_complete.pickle', 'wb') as f:
    pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
!cp /content/data_complete.pickle /content/drive/MyDrive/GNN_On_Image

## Code [Pytorch CIFAR Models](https://github.com/chenyaofo/pytorch-cifar-models)

In [None]:
from pprint import pprint
pprint(torch.hub.list("chenyaofo/pytorch-cifar-models", force_reload=True))

Downloading: "https://github.com/chenyaofo/pytorch-cifar-models/archive/master.zip" to /root/.cache/torch/hub/master.zip


['cifar100_mobilenetv2_x0_5',
 'cifar100_mobilenetv2_x0_75',
 'cifar100_mobilenetv2_x1_0',
 'cifar100_mobilenetv2_x1_4',
 'cifar100_repvgg_a0',
 'cifar100_repvgg_a1',
 'cifar100_repvgg_a2',
 'cifar100_resnet20',
 'cifar100_resnet32',
 'cifar100_resnet44',
 'cifar100_resnet56',
 'cifar100_shufflenetv2_x0_5',
 'cifar100_shufflenetv2_x1_0',
 'cifar100_shufflenetv2_x1_5',
 'cifar100_shufflenetv2_x2_0',
 'cifar100_vgg11_bn',
 'cifar100_vgg13_bn',
 'cifar100_vgg16_bn',
 'cifar100_vgg19_bn',
 'cifar100_vit_b16',
 'cifar100_vit_b32',
 'cifar100_vit_h14',
 'cifar100_vit_l16',
 'cifar100_vit_l32',
 'cifar10_mobilenetv2_x0_5',
 'cifar10_mobilenetv2_x0_75',
 'cifar10_mobilenetv2_x1_0',
 'cifar10_mobilenetv2_x1_4',
 'cifar10_repvgg_a0',
 'cifar10_repvgg_a1',
 'cifar10_repvgg_a2',
 'cifar10_resnet20',
 'cifar10_resnet32',
 'cifar10_resnet44',
 'cifar10_resnet56',
 'cifar10_shufflenetv2_x0_5',
 'cifar10_shufflenetv2_x1_0',
 'cifar10_shufflenetv2_x1_5',
 'cifar10_shufflenetv2_x2_0',
 'cifar10_vgg11_bn

In [None]:
model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar10_mobilenetv2_x0_5", pretrained=True).to(device)
model

Using cache found in /root/.cache/torch/hub/chenyaofo_pytorch-cifar-models_master


MobileNetV2(
  (features): Sequential(
    (0): ConvBNActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(8, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(48, eps=1e-05, momentum

In [None]:
del model.classifier[1]
model.classifier[0] = nn.Dropout(0.0)
model

MobileNetV2(
  (features): Sequential(
    (0): ConvBNActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(8, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(48, eps=1e-05, momentum

## Loading GoogleNet [(Git Repo)](https://github.com/huckiyang/PyTorch-CIFAR10/archive/refs/heads/master.zip)   [(PreTrained - GDrive)](https://drive.google.com/file/d/17fmN8eQdLpq2jIMQ_X0IXDPXfI9oVWgq/view?usp=sharing)

In [None]:
import sys

path = r"/content/drive/MyDrive/GNN_On_Image/PyTorch-CIFAR10-master"
sys.path.append(path)

In [None]:
from cifar10_models import *

In [None]:
gnet = googlenet(pretrained=True)
gnet

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (branch2): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch3): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(192, 16, kernel

In [None]:
gnet.dropout = nn.Dropout(0.0)
gnet.fc = nn.Dropout(0.0)

In [None]:
gnet

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (branch2): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch3): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(192, 16, kernel

## Embeddings (10K)

In [None]:
with open('/content/drive/MyDrive/GNN_On_Image/data_10k.pickle', 'rb') as f:
    data = pickle.load(f)
images = data['images']
labels = data['labels']

In [None]:
images.shape, labels.shape

(torch.Size([10000, 3, 32, 32]), torch.Size([10000]))

In [None]:
for batch in range(0, 10000, 128):
    emb = model(images[batch : batch+128, :, :, :]).detach().numpy()
    if not batch: embedding = emb
    else: embedding = np.concatenate((embedding, emb), axis=0)
embedding.shape

(10000, 1280)

In [None]:
output_10k = {'embeddings': embedding, 'labels': labels}
with open('output_10k.pickle', 'wb') as f:
    pickle.dump(output_10k, f, protocol=pickle.HIGHEST_PROTOCOL)
!cp /content/output_10k.pickle /content/drive/MyDrive/GNN_On_Image

## Embedding (Complete)

In [None]:
with open('/content/drive/MyDrive/GNN_On_Image/data_complete.pickle', 'rb') as f:
    data = pickle.load(f)
images = data['images']
labels = data['labels']

In [None]:
images.shape, labels.shape

(torch.Size([60000, 3, 32, 32]), torch.Size([60000]))

In [None]:
for batch in range(0, 60000, 128):
    emb = model(images[batch : batch+128, :, :, :]).detach().numpy()
    if not batch: embedding = emb
    else: embedding = np.concatenate((embedding, emb), axis=0)
embedding.shape

(60000, 1280)

In [None]:
output_complete = {'embeddings': embedding, 'labels': labels}
with open('output_complete.pickle', 'wb') as f:
    pickle.dump(output_complete, f, protocol=pickle.HIGHEST_PROTOCOL)
!cp /content/output_complete.pickle /content/drive/MyDrive/GNN_On_Image

## GNet Embeddings Complete

In [None]:
with open('/content/drive/MyDrive/GNN_On_Image/data_complete.pickle', 'rb') as f:
    data = pickle.load(f)
images = data['images']
labels = data['labels']

In [None]:
images.shape, labels.shape

(torch.Size([60000, 3, 32, 32]), torch.Size([60000]))

In [None]:
for batch in tqdm(range(0, 60000, 128)):
    emb = gnet(images[batch : batch+128, :, :, :]).detach().numpy()
    if not batch: embedding = emb
    else: embedding = np.concatenate((embedding, emb), axis=0)
embedding.shape

100%|██████████| 469/469 [28:39<00:00,  3.67s/it]


(60000, 1024)

In [None]:
gnet_cifar10_emb_complete = {'embeddings': embedding, 'labels': labels}
with open('gnet_cifar10_emb_complete.pickle', 'wb') as f:
    pickle.dump(gnet_cifar10_emb_complete, f, protocol=pickle.HIGHEST_PROTOCOL)
!cp /content/gnet_cifar10_emb_complete.pickle /content/drive/MyDrive/GNN_On_Image

## Data Prepare For Accuracy Test

In [None]:
with open('/content/drive/MyDrive/GNN_On_Image/data_complete.pickle', 'rb') as f:
    data = pickle.load(f)

images = data['images']
labels = data['labels']

images.shape, labels.shape

(torch.Size([60000, 3, 32, 32]), torch.Size([60000]))

In [None]:
num_nodes = labels.shape[0]
np.random.seed(1)
rand_indices = np.random.permutation(num_nodes)

In [None]:
test = rand_indices[: int(0.4 * num_nodes)]
# val = rand_indices[int(0.4 * num_nodes) : int(0.5 * num_nodes)]
# train = rand_indices[int(0.5 * num_nodes) :]

In [None]:
test_images = images[test]
# val_images = images[val]
# train_images = images[train]

In [None]:
test_labels = labels[test]
# val_labels = labels[val]
# train_labels = labels[train]

## GNet CIFAR 10 Base Accuracy

In [None]:
gnet_test_pred = []
gnet = googlenet(pretrained=True)
for batch in tqdm(range(0, test_labels.shape[0], 128)):
    img = test_images[batch : batch+128, :, :, :]
    out = list(gnet(img).detach().numpy().argmax(axis=1))
    gnet_test_pred += out
len(gnet_test_pred)

100%|██████████| 188/188 [12:20<00:00,  3.94s/it]


24000

In [None]:
from sklearn.metrics import f1_score

print("Micro Score of test set >>>", f1_score(test_labels, gnet_test_pred, average="micro"))
print("Macro Score of test set >>>", f1_score(test_labels, gnet_test_pred, average="macro"))

Micro Score of test set >>> 0.9686666666666667
Macro Score of test set >>> 0.9687476262663314


## MobileNet CIFAR 10 Base accuracy

In [None]:
mnet_test_pred = []
model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar10_mobilenetv2_x0_5", pretrained=True).to(device)
for batch in tqdm(range(0, test_labels.shape[0], 128)):
    img = test_images[batch : batch+128, :, :, :]
    out = list(model(img).detach().numpy().argmax(axis=1))
    mnet_test_pred += out
len(mnet_test_pred)

Downloading: "https://github.com/chenyaofo/pytorch-cifar-models/archive/master.zip" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/chenyaofo/pytorch-cifar-models/releases/download/mobilenetv2/cifar10_mobilenetv2_x0_5-ca14ced9.pt" to /root/.cache/torch/hub/checkpoints/cifar10_mobilenetv2_x0_5-ca14ced9.pt


  0%|          | 0.00/2.85M [00:00<?, ?B/s]

100%|██████████| 188/188 [01:50<00:00,  1.71it/s]


24000

In [None]:
from sklearn.metrics import f1_score

print("Micro Score of test set >>>", f1_score(test_labels, mnet_test_pred, average="micro"))
print("Macro Score of test set >>>", f1_score(test_labels, mnet_test_pred, average="macro"))

Micro Score of test set >>> 0.9324166666666667
Macro Score of test set >>> 0.9325228159506367


# F-MNIST Dataset Load

In [None]:
import pandas as pd
import numpy as np

import torch

In [None]:
df = pd.read_csv('/content/drive/MyDrive/fashion-mnist_train.csv')
df.shape

(60000, 785)

In [None]:
df

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59996,1,0,0,0,0,0,0,0,0,0,...,73,0,0,0,0,0,0,0,0,0
59997,8,0,0,0,0,0,0,0,0,0,...,160,162,163,135,94,0,0,0,0,0
59998,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
labels = np.array(df['label']).reshape(-1,)
labels

array([2, 9, 6, ..., 8, 8, 7])

In [None]:
images = np.array(df.iloc[:, 1:], dtype=np.float32)
images = images / 255.0
images = images.reshape((-1, 1, 28, 28))
images.shape 

(60000, 1, 28, 28)

In [None]:
images = torch.FloatTensor(images)
labels = torch.IntTensor(labels)

In [None]:
data = {'images' : images, 'labels' : labels}

In [None]:
model

MobileNetV2(
  (features): Sequential(
    (0): ConvBNActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(8, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(48, eps=1e-05, momentum

In [None]:
model.features[0][0] = nn.Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

In [None]:
model.features[0]

ConvBNActivation(
  (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU6(inplace=True)
)

In [None]:
for batch in tqdm(range(0, 60000, 128)):
    emb = model(images[batch : batch+128, :, :, :]).detach().numpy()
    if not batch: embedding = emb
    else: embedding = np.concatenate((embedding, emb), axis=0)
embedding.shape

100%|██████████| 469/469 [03:35<00:00,  2.18it/s]


(60000, 1280)

In [None]:
fmnist_complete = {'embeddings': embedding, 'labels': labels}
with open('fmnist_complete.pickle', 'wb') as f:
    pickle.dump(fmnist_complete, f, protocol=pickle.HIGHEST_PROTOCOL)
!cp /content/fmnist_complete.pickle /content/drive/MyDrive/GNN_On_Image

# STL - 10

In [None]:
!pip install kaggle
!mkdir ~/.kaggle
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download jessicali9530/stl10

In [None]:
!unzip /content/stl10.zip

In [None]:
len(os.listdir('/content/train_images'))

5000

In [None]:
np.asarray(img.open('/content/train_images/train_image_png_1.png')).reshape(3, 96, 96).shape

(3, 96, 96)