# This is the notebook for project Contextual Privacy Policy for Mobile Apps. Ultimately, we aim to achieve the privacy-related icons detection and classification.

## We first to adapt an existing icons classifier.

In [None]:
import torch
import numpy as np
import torchvision
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import time
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = "cpu"
print(device)

In [None]:
# check CPU
!cat /proc/cpuinfo

In [None]:
!nvidia-smi

In [None]:
# load the data
x_train = np.load("/content/gdrive/MyDrive/mobile-semantics-classification/training_x.npy")
y_train = np.load("/content/gdrive/MyDrive/mobile-semantics-classification/training_y.npy")
x_test = np.load("/content/gdrive/MyDrive/mobile-semantics-classification/validation_x.npy")
y_test = np.load("/content/gdrive/MyDrive/mobile-semantics-classification/validation_y.npy")

print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
print('x_test shape:', x_test.shape)
print('y_test shape:', y_test.shape)

print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# num_classes = np.unique(y_train).shape[0]
# Print Unique Icon Classes, 99 classes
# print(np.unique(y_train))
# print(num_classes, ' classes')

# Convert class vectors to binary class matrices.
# y_train = keras.utils.to_categorical(y_train, num_classes)
# y_test = keras.utils.to_categorical(y_test, num_classes)

#print(y_train[0])
# y_train = torch.tensor(y_train)
# y_train = F.one_hot(y_train.to(torch.int64), num_classes=num_classes)
# y_train = np.array(y_train)
#print(y_train[0])

# idx_train = np.where((y_train == [72]) | (y_train == [42])
#  | (y_train == [91]) | (y_train == [6]) | (y_train == [40])
#   | (y_train == [43]) | (y_train == [82]) | (y_train == [3]) | (y_train == [68])
#    )

# x_train = x_train[idx_train[0]]
# y_train = y_train[idx_train[0]]

# print(np.where(y_train == [6]))

# change the selected labels to 0-11
# y_train[np.where(y_train==[3])] = [7]
# y_train[np.where(y_train==[72])] = [0]
# y_train[np.where(y_train==[42])] = [1]

# y_train[np.where(y_train==[91])] = [2]
# y_train[np.where(y_train==[6])] = [3]

# y_train[np.where(y_train==[40])] = [4]
# y_train[np.where(y_train==[43])] = [5]
# y_train[np.where(y_train==[82])] = [6]
# y_train[np.where(y_train==[68])] = [8]


# print("idx_train length: ", idx_train[0].shape)
# print('x_train_selected shape:', x_train.shape)
# print('y_train_selected shape:', y_train.shape)



# idx_test = np.where((y_test == [72]) | (y_test == [42])
#  | (y_test == [91]) | (y_test == [6]) | (y_test == [40])
#   | (y_test == [43]) | (y_test == [82]) | (y_test == [3]) | (y_test == [68])
#    )

# x_test = x_test[idx_test[0]]
# y_test = y_test[idx_test[0]]

# change the selected labels to 0-11
# y_test[np.where(y_test==[3])] = [7]
# y_test[np.where(y_test==[72])] = [0]
# y_test[np.where(y_test==[42])] = [1]

# y_test[np.where(y_test==[91])] = [2]
# y_test[np.where(y_test==[6])] = [3]

# y_test[np.where(y_test==[40])] = [4]
# y_test[np.where(y_test==[43])] = [5]
# y_test[np.where(y_test==[82])] = [6]
# y_test[np.where(y_test==[68])] = [8]


# print("idx_test length: ", idx_test[0].shape)
# print('x_test_selected shape:', x_test.shape)
# print('y_test_selected shape:', y_test.shape)

print("reshape: ")
x_train = x_train.reshape([x_train.shape[0], x_train.shape[3], x_train.shape[1], x_train.shape[2]])
x_test = x_test.reshape([x_test.shape[0], x_test.shape[3], x_test.shape[1], x_test.shape[2]])

print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
print('x_test shape:', x_test.shape)
print('y_test shape:', y_test.shape)

num_classes = np.unique(y_train).shape[0]
print(num_classes, ' classes')
# print(np.unique(y_train))

# num_classes = np.unique(y_test).shape[0]
# print(num_classes, ' classes')
# print(np.unique(y_test))

In [None]:
# use dataloader

# print(x_train[0])
# print(x_train.mean())

mean = x_train.mean()
std = x_train.std()

# mean = mean/len(x_train)
# std = std/len(x_train)

print("mean: ", mean)
print("std: ", std)

transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std), # featurewise centering

    # data augmentation
    # transforms.RandomAffine(degrees=0, translate=(0.1,0.1)), # randomly moved along the x- and y-axis by up to 10%

    # add more operations
    # transforms.RandomHorizontalFlip(),
    # transforms.RandomRotation(15)
])

transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std), # featurewise centering
])

class CustomDataset(Dataset):

    def __init__(self, images, labels, transform=None):
        self.labels = labels
        self.images = images

        self.labels = torch.LongTensor(labels)

        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        label = self.labels[idx]
        image = self.images[idx]

        if self.transform is not None:

            image = self.transform(image)
            image = image.permute(1,0,2)


        return image, label

batch_size = 16

train_loader = DataLoader(CustomDataset(x_train[:113840],y_train[:113840],transform_train), batch_size=batch_size, shuffle=True)
# validation_loader = DataLoader(CustomDataset(x_train[9600:11200],y_train[9600:11200]), batch_size=batch_size, shuffle=True)
# test_loader = DataLoader(CustomDataset(x_test[:1600],y_test[:1600]), batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(CustomDataset(x_test[0:4800],y_test[0:4800],transform_val), batch_size=batch_size, shuffle=True)

In [None]:
# choose model

# -------------- use our own model -------------
# model = Net().to(device)

# --------------- use resnet18 -------------------

# from torchvision import models

# model = models.resnet18().to(device)
# in_feature_num = model.fc.in_features
# model.fc = nn.Linear(in_feature_num, 99)
# # model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(3,3), padding=(3,3), stride=(2,2), bias=False)
# model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(5,5), padding=(3,3), stride=(2,2), bias=False)

# --------------- use resnet50 -------------------

# from torchvision import models

# model = models.resnet50().to(device)
# in_feature_num = model.fc.in_features
# model.fc = nn.Linear(in_feature_num, 99)
# # model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(3,3), padding=(3,3), stride=(2,2), bias=False)
# model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(5,5), padding=(3,3), stride=(2,2), bias=False)

# -------------- mobilenet -----------------------

# from torchvision import models

# # model = models.mobilenet_v2().to(device)
# model = models.mobilenet_v3_small().to(device)

# model.features[0][0] = torch.nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1, bias=False)

# num_features = model.classifier[1].in_features
# model.classifier[1] = torch.nn.Linear(num_features, 99)

# -------------- mobilenet_v3 -----------------------

from torchvision import models

# model = models.mobilenet_v2().to(device)
model = models.mobilenet_v3_small().to(device)

model.features[0][0] = torch.nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1, bias=False)

model.classifier[-1] = nn.Linear(1024, 99)

# --------------- end ----------------------------

model = model.to(device)

In [None]:
# train the model

# Put all hyperparameters together
initial_leanring_rate = 0.001
learning_rate_weight_decay = 3e-4
epoch_num = 20



loss = nn.CrossEntropyLoss()

optimizer = torch.optim.RMSprop(params=model.parameters(), lr=initial_leanring_rate, weight_decay=learning_rate_weight_decay)
# optimizer = torch.optim.Adam(params=model.parameters(), lr=initial_leanring_rate, weight_decay=learning_rate_weight_decay)

best_model = model

best_accuracy = 0

best_epoch = 0

train_accuracy_all = []

validation_accuracy_all = []

class_correct = list(0. for i in range(num_classes))

class_total = list(0. for i in range(num_classes))

for epoch in range(epoch_num):

    start_time = time.time()

    train_accuracy = 0
    validation_accuracy = 0
    train_loss = 0
    validation_loss = 0

    model.train()
    for i, data in enumerate(train_loader):

        optimizer.zero_grad()

        train_output = model(data[0].to(device))

        data[1] = data[1].view(data[1].shape[0])

        batch_loss = loss(train_output, data[1].to(device))
        # print(train_output.size())
        # print(data[1].size())
        batch_loss.backward()
        optimizer.step()

        train_accuracy = train_accuracy + np.sum(np.argmax(train_output.cpu().data.numpy(),axis=1) == data[1].cpu().numpy())
        # print('correct train in this batch: ', np.sum(np.argmax(train_output.cpu().data.numpy(),axis=1) == data[1].cpu().numpy()))
        # print('all correct train: ', train_accuracy)
        train_loss = train_loss + batch_loss.item()

        # print("training | num of right labelling in this batch: ", np.sum(np.argmax(train_output.data.numpy(),axis=1) == data[1].numpy()))

    model.eval()
    with torch.no_grad():
        for i, data in enumerate(validation_loader):

            validation_output = model(data[0].to(device))

            data[1] = data[1].view(data[1].shape[0])

            batch_loss = loss(validation_output, data[1].to(device))

            validation_accuracy = validation_accuracy + np.sum(np.argmax(validation_output.cpu().data.numpy(),axis=1) == data[1].cpu().numpy())
            # print('correct val in this batch: ', np.sum(np.argmax(validation_output.cpu().data.numpy(),axis=1) == data[1].cpu().numpy()))
            # print('all correct val: ', validation_accuracy)
            validation_loss = validation_loss + batch_loss.item()

            # print("validation | num of right labelling: ", np.sum(np.argmax(validation_output.data.numpy(),axis=1) == data[1].numpy()))

            c = (np.argmax(validation_output.cpu().data.numpy(),axis=1) == data[1].cpu().numpy())
            c = c.squeeze()
            for i in range(batch_size):
              i_label = data[1][i]
              class_correct[i_label] += c[i]
              class_total[i_label] += 1

        print("epoch: ", epoch + 1)
        print("train accuracy: ", train_accuracy/(train_loader.__len__()*batch_size))
        print("train loss: ", train_loss/(train_loader.__len__()*batch_size))
        print("validation accuracy: ", validation_accuracy/(validation_loader.__len__()*batch_size))
        print("validation loss: ", validation_loss/(validation_loader.__len__()*batch_size))

        print("time cost: %2.2f s" % (time.time() - start_time))

        num_90_percent_correct = 0
        num_80_percent_correct = 0
        num_70_percent_correct = 0
        num_60_percent_correct = 0
        num_50_percent_correct = 0
        num_under_50_percent_correct = 0

        for i in range(0, num_classes):

          class_percent_correct = np.round(100 * class_correct[i] / class_total[i], 2)

          print(f"The accuracy of class {i}: {class_percent_correct}%")

          if class_percent_correct >= 90:
            num_90_percent_correct = num_90_percent_correct + 1
          elif class_percent_correct >= 80:
            num_80_percent_correct = num_80_percent_correct + 1
          elif class_percent_correct >= 70:
            num_70_percent_correct = num_70_percent_correct + 1
          elif class_percent_correct >= 60:
            num_60_percent_correct = num_60_percent_correct + 1
          elif class_percent_correct >= 50:
            num_50_percent_correct = num_50_percent_correct + 1
          else:
            num_under_50_percent_correct = num_under_50_percent_correct + 1

        print("The number of classes with accuracy equal to or more than 90%: ", num_90_percent_correct)
        print("The number of classes with accuracy equal to or more than 80%: ", num_80_percent_correct)
        print("The number of classes with accuracy equal to or more than 70%: ", num_70_percent_correct)
        print("The number of classes with accuracy equal to or more than 60%: ", num_60_percent_correct)
        print("The number of classes with accuracy equal to or more than 50%: ", num_50_percent_correct)
        print("The number of classes with accuracy lower than 50%: ", num_under_50_percent_correct)

        train_accuracy_all.append(train_accuracy/(train_loader.__len__()*batch_size))
        validation_accuracy_all.append(validation_accuracy/(validation_loader.__len__()*batch_size))



#         print("epoch training | num of right labelling: ", train_accuracy)
#         print("epoch training | len of train_loader: ", train_loader.__len__()*batch_size)
#         print("epoch val | num of right labelling: ", validation_accuracy)
#         print("epoch val | len of validation_loader: ", validation_loader.__len__()*batch_size)

        if best_accuracy < (validation_accuracy/validation_loader.__len__()):
            best_model = model
            best_accuracy = validation_accuracy/validation_loader.__len__()
            best_epoch = epoch + 1



In [None]:
# visualization

print(train_accuracy_all)
print(validation_accuracy_all)

fig = plt.figure(figsize=(20,10))

# ax1=fig.add_subplot(121)
# ax1.plot(np.arange(1,11),train_accuracy_all)

# ax2=fig.add_subplot(222)
# ax2.plot(np.arange(1,11),validation_accuracy_all)

plt.plot(np.arange(1,epoch_num+1),train_accuracy_all,'r',label='train accuracy')
plt.plot(np.arange(1,epoch_num+1),validation_accuracy_all,'y',label='validation accuracy')
plt.ylim(0.0, 1.0)
plt.xticks(np.arange(1,epoch_num+1))
plt.title('The train accuracy and validation accuracy')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.legend()

plt.show()

In [None]:
# save the model

# PATH = './saved_model/model-10000-10-resnet18.pkl'
# torch.save(best_model, PATH)

# trained_model = torch.load(PATH)

# save the parameters

# PATH = '/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-10000-10-resnet18.pkl'
# torch.save(best_model.state_dict(), PATH)

PATH = '/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-99-mobilenetv3.pkl'
torch.save(best_model.state_dict(), PATH)
print("Accuracy for best model: ", best_accuracy/batch_size)
print("Epoch: ", best_epoch)

# trained_model = model()
# trained_model.load_state_dict(torch.load(PATH))

In [None]:
# PATH = '/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-99-resnet18.pkl'
# trained_model = model()
# trained_model.load_state_dict(torch.load(PATH))

from torchvision import models

model = models.resnet18()
in_feature_num = model.fc.in_features
model.fc = nn.Linear(in_feature_num, 99)
# model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(3,3), padding=(3,3), stride=(2,2), bias=False)
model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(5, 5), padding=(3, 3), stride=(2, 2),
                            bias=False)
# PATH = "C:/ANU/2022 s2/honours project/code/UIED-master/model/model-99-resnet18.pkl"
PATH = '/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-99-resnet18.pkl'
# trained_model = model()
model.load_state_dict(torch.load(PATH))

model = model.to(device)

model.eval()

validation_accuracy = 0
validation_loss = 0

class_correct = list(0. for i in range(num_classes))

class_total = list(0. for i in range(num_classes))

loss = nn.CrossEntropyLoss()

with torch.no_grad():
    for i, data in enumerate(validation_loader):

      validation_output = model(data[0].to(device))

      data[1] = data[1].view(data[1].shape[0])

      batch_loss = loss(validation_output, data[1].to(device))

      validation_accuracy = validation_accuracy + np.sum(np.argmax(validation_output.cpu().data.numpy(),axis=1) == data[1].cpu().numpy())

      if i == 0:
        print("validation_output for i = 0: ", validation_output.cpu().data.numpy())
        # print("index 47's' value in output: ", validation_output.cpu().data.numpy()[0][47])
        print("max value of output for i = 0: ", np.max(validation_output.cpu().data.numpy(),axis=1))
        print("label for i == 0: ", np.argmax(validation_output.cpu().data.numpy(),axis=1))
        print("ground truth label: ", data[1].cpu().numpy())

      # print('correct val in this batch: ', np.sum(np.argmax(validation_output.cpu().data.numpy(),axis=1) == data[1].cpu().numpy()))
      # print('all correct val: ', validation_accuracy)
      validation_loss = validation_loss + batch_loss.item()

      # print("validation | num of right labelling: ", np.sum(np.argmax(validation_output.data.numpy(),axis=1) == data[1].numpy()))

      c = (np.argmax(validation_output.cpu().data.numpy(),axis=1) == data[1].cpu().numpy())
      c = c.squeeze()
      for i in range(batch_size):
        i_label = data[1][i]
        class_correct[i_label] += c[i]
        class_total[i_label] += 1

print("validation accuracy: ", validation_accuracy/(validation_loader.__len__()*batch_size))
print("validation loss: ", validation_loss/(validation_loader.__len__()*batch_size))

for i, data in enumerate(validation_loader):
  if i == 0:
    print(data[0][0])
    break

In [None]:
# convert .npy file to image

import PIL
from PIL import Image
import matplotlib.pyplot as plt

sample_data = np.load('/content/gdrive/MyDrive/mobile-semantics-classification/validation_x.npy')
# print(sample_data[0,:,:,:].shape)
array = np.reshape(sample_data[10,:,:,:], [32, 32])

# array = np.array([array[1], array[0]])

sample_image = Image.fromarray(array)
# sample_image.show()
plt.imshow(sample_image)
# print("the array of image: ", np.asarray(sample_image))

array = np.asarray(sample_image)
array = array.astype('float32')
array = array / 255
array = (array - array.mean()) / array.std()

print("array mean: ", array.mean())
print("array std: ", array.std())

array = array.reshape(1, 1, 32, 32)

array = torch.tensor(array)
array = array.permute(0,1,3,2)
print("array_tensor: ", array)

array_pred_label = model(array.to(device))
print("output: ", array_pred_label)
print("max value in output: ", np.max(array_pred_label.cpu().data.numpy()))
print("index of max value in output: ", np.argmax(array_pred_label.cpu().data.numpy(),axis=1))

In [None]:
for i, data in enumerate(validation_loader):
  if i == 0:
    # print("The first image of validation set: ", data[0][0])
    first_image = np.reshape(data[0][0], [32, 32])
    sample_first_image = Image.fromarray(np.array(first_image*255))

    plt.imshow(sample_first_image)
    # print("The array of image: ", np.asarray(sample_first_image))
    # print("The array of image: ", first_image)

    print("The tensorof image: ", data[0][0])

    first_image_output = model(data[0].to(device))
    print("output: ", (first_image_output.cpu().data.numpy())[0])

    print("max value in output: ", np.max(first_image_output.cpu().data.numpy()[0]))
    print("index of max value in output: ", np.argmax(first_image_output.cpu().data.numpy()[0]))

    break


In [None]:
# test existing model

model_small_cnn = torch.load('/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/small_cnn_weights_100_512.h5')

In [None]:
from torchvision import models

model = models.resnet18()
in_feature_num = model.fc.in_features
model.fc = nn.Linear(in_feature_num, 99)
# model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(3,3), padding=(3,3), stride=(2,2), bias=False)
model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(5, 5), padding=(3, 3), stride=(2, 2),
                            bias=False)
# PATH = "C:/ANU/2022 s2/honours project/code/UIED-master/model/model-99-resnet18.pkl"
PATH = '/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-99-resnet18.pkl'
# trained_model = model()
model.load_state_dict(torch.load(PATH))

model = model.to(device)

model.eval()

In [None]:
# efficiency

from torchvision import models

model = models.resnet18()
in_feature_num = model.fc.in_features
model.fc = nn.Linear(in_feature_num, 99)
# model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(3,3), padding=(3,3), stride=(2,2), bias=False)
model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(5, 5), padding=(3, 3), stride=(2, 2),
                            bias=False)
# PATH = "C:/ANU/2022 s2/honours project/code/UIED-master/model/model-99-resnet18.pkl"
PATH = '/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-99-resnet18.pkl'
# trained_model = model()
model.load_state_dict(torch.load(PATH))

model = model.to(device)

model.eval()

validation_accuracy = 0
validation_loss = 0

class_correct = list(0. for i in range(num_classes))

class_total = list(0. for i in range(num_classes))

loss = nn.CrossEntropyLoss()

with torch.no_grad():
    for i, data in enumerate(validation_loader):

      validation_output = model(data[0].to(device))

      data[1] = data[1].view(data[1].shape[0])

      batch_loss = loss(validation_output, data[1].to(device))

      validation_accuracy = validation_accuracy + np.sum(np.argmax(validation_output.cpu().data.numpy(),axis=1) == data[1].cpu().numpy())

      if i == 0:
        print("validation_output for i = 0: ", validation_output.cpu().data.numpy())
        # print("index 47's' value in output: ", validation_output.cpu().data.numpy()[0][47])
        print("max value of output for i = 0: ", np.max(validation_output.cpu().data.numpy(),axis=1))
        print("label for i == 0: ", np.argmax(validation_output.cpu().data.numpy(),axis=1))
        print("ground truth label: ", data[1].cpu().numpy())

      # print('correct val in this batch: ', np.sum(np.argmax(validation_output.cpu().data.numpy(),axis=1) == data[1].cpu().numpy()))
      # print('all correct val: ', validation_accuracy)
      validation_loss = validation_loss + batch_loss.item()

      # print("validation | num of right labelling: ", np.sum(np.argmax(validation_output.data.numpy(),axis=1) == data[1].numpy()))

      c = (np.argmax(validation_output.cpu().data.numpy(),axis=1) == data[1].cpu().numpy())
      c = c.squeeze()
      for i in range(batch_size):
        i_label = data[1][i]
        class_correct[i_label] += c[i]
        class_total[i_label] += 1

print("validation accuracy: ", validation_accuracy/(validation_loader.__len__()*batch_size))
print("validation loss: ", validation_loss/(validation_loader.__len__()*batch_size))

for i, data in enumerate(validation_loader):
  if i == 0:
    print(data[0][0])
    break

In [None]:
# efficiency

import pickle
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from torchvision import models

# ------ resnet18 ----------------

# model = models.resnet18()
# in_feature_num = model.fc.in_features
# model.fc = nn.Linear(in_feature_num, 99)
# # model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(3,3), padding=(3,3), stride=(2,2), bias=False)
# model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(5, 5), padding=(3, 3), stride=(2, 2),
#                             bias=False)
# # PATH = '/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-99-resnet18-epoch30.pkl'
# PATH = '/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-99-resnet18.pkl'
# # trained_model = model()
# model.load_state_dict(torch.load(PATH))

# ------- semantic model --------

# model = Net().to(device)
# PATH = '/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-99-semantic.pkl'
# model.load_state_dict(torch.load(PATH))

# ------- mobilenetv2 ------------
# model = models.mobilenet_v2().to(device)

# model.features[0][0] = torch.nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1, bias=False)

# num_features = model.classifier[1].in_features
# model.classifier[1] = torch.nn.Linear(num_features, 99)

# PATH = '/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-99-mobilenetv2.pkl'
# model.load_state_dict(torch.load(PATH))

# ------- mobilenetv3 ----------

model = models.mobilenet_v3_small().to(device)

model.features[0][0] = torch.nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1, bias=False)

model.classifier[-1] = nn.Linear(1024, 99)

PATH = '/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-99-mobilenetv3.pkl'
model.load_state_dict(torch.load(PATH))

# ------- end --------------------

model = model.to(device)

model.eval()

y_true = []
y_pred = []

# Make predictions on the test DataLoader

start_time = time.time()

for inputs, targets in validation_loader:
    inputs = inputs.to(device)  # Send data to GPU if available
    targets = targets.to(device)

    # Forward pass
    outputs = model(inputs)

    # Get predicted labels
    _, predicted = torch.max(outputs, 1)

    # Store true and predicted labels
    y_true.extend(targets.cpu().numpy())
    y_pred.extend(predicted.cpu().numpy())

print("processing time: %2.5f s" % (time.time() - start_time))

# # Make predictions on the test dataset
# print("len(validation_loader[0]): ", len(validation_loader[0]))
# validation_output = model.predict(validation_loader[0].to(device))
# y_pred = np.argmax(validation_output.cpu().data.numpy(),axis=1)
# print("y_pred: ", y_pred)

# # Calculate the confusion matrix
# conf_matrix = confusion_matrix(y_true, y_pred)

# # Calculate accuracy, precision, and recall
# accuracy = accuracy_score(y_true, y_pred)
# precision = precision_score(y_true, y_pred, average='macro')  # You can set the average parameter as needed ('micro', 'macro', etc.)
# recall = recall_score(y_true, y_pred, average='macro')  # You can set the average parameter as needed ('micro', 'macro', etc.)

# print("Accuracy:", accuracy)
# print("Precision:", precision)
# print("Recall:", recall)

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
import pandas as pd

for i in range(len(y_true)):
  y_true[i] = y_true[i][0]

labels_order = [72.0, 42.0, 77.0, 91.0, 6.0, 89.0, 40.0, 43.0, 82.0, 3.0, 68.0, 49.0, 56.0, 51.0]

# Create a crosstab (contingency table) from y_true and y_pred
print("y_true: ", y_true)
print("y_pred: ", y_pred)
contingency_table = pd.crosstab(pd.Series(y_true, name='True'), pd.Series(y_pred, name='Predicted'))

# Print per-class accuracy for the interested classes
for class_name in labels_order:
    if class_name in contingency_table.index and class_name in contingency_table.columns:
        true_positives = contingency_table.loc[class_name, class_name]
        row_sum = contingency_table.loc[class_name, :].sum()
        col_sum = contingency_table.loc[:, class_name].sum()
        per_class_accuracy = true_positives / (row_sum + col_sum - true_positives)
        print(f"Accuracy for class {class_name}: {per_class_accuracy:.3f}")

# Alternatively, you can use the classification_report function to get a more comprehensive report
# print(classification_report(gt_labels, pred_labels))

accuracy = accuracy_score(y_true, y_pred)
average_precision = precision_score(y_true, y_pred, average='macro')
average_recall = recall_score(y_true, y_pred, average='macro')

precision = precision_score(y_true, y_pred, average=None, labels=labels_order)

recall = recall_score(y_true, y_pred, average=None, labels=labels_order)

f1 = f1_score(y_true, y_pred, average=None, labels=labels_order)

print("accuracy: ", accuracy)
print("average_precision: ", average_precision)
print("average_recall: ", average_recall)
print("precision: ", precision)
print("recall: ", recall)

In [None]:
# save entire model
torch.save(model, "/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-99-resnet18-entire.pkl")

In [None]:
# need to use transformers for ViT
!pip install transformers

In [None]:
# try ViT on rico

from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import models
from torch import nn, optim
from transformers import ViTFeatureExtractor, ViTForImageClassification
import time
from PIL import Image

# # Transformation to resize and normalize images
# transform = transforms.Compose([
#     transforms.Resize((224, 224)),
#     transforms.ToTensor(),
#     # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])

# # Load your own datasets
# train_data = ImageFolder(root='path/to/train/data', transform=transform)
# test_data = ImageFolder(root='path/to/test/data', transform=transform)

# # Create data loaders
# batch_size = 32
# train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
# test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)


mean = x_train.mean()
std = x_train.std()

print("mean: ", mean)
print("std: ", std)

transform_train_vit = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
    # transforms.Normalize(mean=mean, std=std), # featurewise centering
])

transform_val_vit = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
    # transforms.Normalize(mean=mean, std=std), # featurewise centering
])

class CustomDataset(Dataset):

    def __init__(self, images, labels, transform=None):
        self.labels = labels
        self.images = images

        self.labels = torch.LongTensor(labels)

        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        label = self.labels[idx]
        image = self.images[idx]

        image = np.squeeze(image)

        # image = ((image - image.min()) * (1/(image.max() - image.min()) * 255)).astype('uint8')

        # image = Image.fromarray(image.astype('uint8'))
        image = Image.fromarray(image)

        if self.transform is not None:

            image = self.transform(image)
            # image = image.permute(1,0,2)


        return image, label

batch_size = 16

train_loader_vit = DataLoader(CustomDataset(x_train[:113840],y_train[:113840],transform_train_vit), batch_size=batch_size, shuffle=True)
# validation_loader = DataLoader(CustomDataset(x_train[9600:11200],y_train[9600:11200]), batch_size=batch_size, shuffle=True)
# test_loader = DataLoader(CustomDataset(x_test[:1600],y_test[:1600]), batch_size=batch_size, shuffle=True)
validation_loader_vit = DataLoader(CustomDataset(x_test[0:4800],y_test[0:4800],transform_val_vit), batch_size=batch_size, shuffle=True)


In [None]:
# continue ViT on rico

# Define the feature extractor
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
# feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224', do_normalize=True, image_mean=[mean.item()], image_std=[std.item()])

# Load the pretrained model
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')

# Adapt the model for 99 classes
model.classifier = nn.Linear(model.config.hidden_size, 99)

# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

num_epochs = 2

model = model.to(device)

# # Training loop
# for epoch in range(num_epochs):
#     model.train()
#     running_loss = 0.0
#     correct_predictions = 0
#     total_predictions = 0

#     start_time = time.time()

#     for images, labels in train_loader_vit:
#         # Zero the gradients
#         optimizer.zero_grad()

#         # Perform forward pass
#         images = images.to(device)
#         labels = labels.to(device)
#         features = feature_extractor(images)
#         outputs = model(**features)

#         # Compute loss
#         loss = criterion(outputs.logits, labels)

#         # Perform backward pass and optimization
#         loss.backward()
#         optimizer.step()

#         # Calculate the running loss and the accuracy for this epoch
#         running_loss += loss.item()
#         _, preds = torch.max(outputs.logits, 1)
#         correct_predictions += torch.sum(preds == labels)
#         total_predictions += labels.shape[0]

#     train_accuracy = correct_predictions.double() / total_predictions
#     train_loss = running_loss / len(train_loader_vit)

#     # Evaluation on test (or validation) data
#     model.eval()
#     running_loss = 0.0
#     correct_predictions = 0
#     total_predictions = 0
#     with torch.no_grad():
#         for images, labels in validation_loader_vit:
#             images = images.to(device)
#             labels = labels.to(device)
#             features = feature_extractor(images)
#             outputs = model(**features)
#             loss = criterion(outputs.logits, labels)

#             # Calculate the running loss and the accuracy for this epoch
#             running_loss += loss.item()
#             _, preds = torch.max(outputs.logits, 1)
#             correct_predictions += torch.sum(preds == labels)
#             total_predictions += labels.shape[0]

#     validation_accuracy = correct_predictions.double() / total_predictions
#     validation_loss = running_loss / len(validation_loader_vit)

#     end_time = time.time()

#     print(f'Epoch: {epoch+1}/{num_epochs}, ',
#           f'Train accuracy: {train_accuracy}, Train loss: {train_loss}, ',
#           f'Validation accuracy: {validation_accuracy}, Validation loss: {validation_loss}, ',
#           f'Time: {end_time - start_time} sec')


# Best validation accuracy
best_val_acc = 0
best_model = model

for epoch in range(num_epochs):
    start_time = time.time()
    total_train_loss = 0
    total_val_loss = 0
    total_train_correct = 0
    total_val_correct = 0

    # Training
    model.train()
    for images, labels in train_loader_vit:
        # Zero the gradients
        optimizer.zero_grad()

        # Perform forward pass
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)

        labels = labels.view(labels.shape[0])

        # Compute loss
        loss = criterion(outputs.logits, labels)

        # Perform backward pass and optimization
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        _, predicted = torch.max(outputs.logits, 1)
        total_train_correct += (predicted == labels).sum().item()

    # Validation
    model.eval()
    with torch.no_grad():
        for images, labels in validation_loader_vit:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)

            labels = labels.view(labels.shape[0])

            val_loss = criterion(outputs.logits, labels)

            total_val_loss += val_loss.item()
            _, predicted = torch.max(outputs.logits, 1)
            total_val_correct += (predicted == labels).sum().item()

    avg_train_loss = total_train_loss / len(train_loader_vit)
    avg_val_loss = total_val_loss / len(validation_loader_vit)
    train_acc = total_train_correct / len(x_train)
    val_acc = total_val_correct / len(x_test)
    epoch_time = time.time() - start_time

    if val_acc > best_val_acc:
      best_val_acc = val_acc
      best_model = model

    print(f'Epoch: {epoch+1}, Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.4f}, Time: {epoch_time:.2f}s')


In [None]:
# save the ViT model
torch.save(best_model, "/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-99-ViT-entire.pkl")

In [None]:
# vit efficiency on validation set


# # Load the pretrained model
# model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')

# # Adapt the model for 99 classes
# model.classifier = nn.Linear(model.config.hidden_size, 99)

# # Loss function
# criterion = nn.CrossEntropyLoss()

# # Optimizer
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)


import pickle
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from torchvision import models
import time
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

# ------- end --------------------

# Load the model
model = torch.load('/content/gdrive/MyDrive/mobile-semantics-classification/saved_model/model-99-ViT-entire.pkl')
model = model.to(device)
model.eval()  # Set the model to evaluation mode

y_pred = []
y_true = []

start_time = time.time()

# Don't calculate gradients
with torch.no_grad():
    for images, labels in validation_loader_vit:
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)

        _, predicted = torch.max(outputs.logits, 1)
        y_pred.extend(predicted.cpu().numpy())
        y_true.extend(labels.cpu().numpy())

end_time = time.time()
elapsed_time = end_time - start_time

# # Convert to numpy arrays
# all_preds = np.array(all_preds)
# all_labels = np.array(all_labels)

# # Calculate accuracy, precision, recall and F1 score
# accuracy = accuracy_score(all_labels, all_preds)
# precision = precision_score(all_labels, all_preds, average='macro')
# recall = recall_score(all_labels, all_preds, average='macro')
# f1 = f1_score(all_labels, all_preds, average='macro')

# print('Validation Accuracy: ', accuracy)
# print('Precision: ', precision)
# print('Recall: ', recall)
# print('F1 Score: ', f1)
# print('Time cost for evaluation: ', elapsed_time, 'seconds')


In [None]:
#continue ViT evaluation (table4_b)

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
import pandas as pd

interested_accuracy = []

# print(y_true)

# for i in range(len(y_true)):
#   y_true[i] = y_true[i][0]

labels_order = [72.0, 42.0, 77.0, 91.0, 6.0, 89.0, 40.0, 43.0, 82.0, 3.0, 68.0, 49.0, 56.0, 61.0]

# Create a crosstab (contingency table) from y_true and y_pred
print("y_true: ", y_true)
print("y_pred: ", y_pred)
contingency_table = pd.crosstab(pd.Series(y_true, name='True'), pd.Series(y_pred, name='Predicted'))

# Print per-class accuracy for the interested classes
for class_name in labels_order:
    if class_name in contingency_table.index and class_name in contingency_table.columns:
        true_positives = contingency_table.loc[class_name, class_name]
        row_sum = contingency_table.loc[class_name, :].sum()
        col_sum = contingency_table.loc[:, class_name].sum()
        per_class_accuracy = true_positives / (row_sum + col_sum - true_positives)
        print(f"Accuracy for class {class_name}: {per_class_accuracy:.3f}")
        interested_accuracy.append(per_class_accuracy)

# Alternatively, you can use the classification_report function to get a more comprehensive report
# print(classification_report(gt_labels, pred_labels))

accuracy = accuracy_score(y_true, y_pred)
average_precision = precision_score(y_true, y_pred, average='macro')
average_recall = recall_score(y_true, y_pred, average='macro')

precision = precision_score(y_true, y_pred, average=None, labels=labels_order)

recall = recall_score(y_true, y_pred, average=None, labels=labels_order)

f1 = f1_score(y_true, y_pred, average=None, labels=labels_order)

print("accuracy: ", accuracy)
print("average_precision: ", average_precision)
print("average_recall: ", average_recall)
print("precision: ", precision)
print("recall: ", recall)
print('Time cost for evaluation: ', elapsed_time, 'seconds')

interested_precision = np.array(precision).sum()/len(precision)
interested_recall = np.array(recall).sum()/len(recall)

print("interested accuracy: ", np.array(interested_accuracy).mean())
print("interested precision: ", interested_precision)
print("interested recall: ", interested_recall)