# GPU Setting

In [1]:
import tensorflow as tf

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
USE_GPU = True

if USE_GPU and tf.config.list_physical_devices('GPU'):
    device = 'GPU'
    print("Using GPU")
else:
    device = 'CPU'
    print("Using CPU")

2024-11-20 18:01:16.280862: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-11-20 18:01:16.335574: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Num GPUs Available:  1
Using GPU


2024-11-20 18:01:17.341658: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-11-20 18:01:17.362875: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-11-20 18:01:17.363039: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


# Loading Dataset

In [2]:
import tarfile
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import random

# Path to the downloaded tgz file
tgz_path = "/home/asko/Documents/workspace/Fall-24/682/project/dataset/imagenette2.tgz"
extract_path = "./imagenette"  # Target folder for extraction

# Extract the file
with tarfile.open(tgz_path, "r:gz") as tar:
    tar.extractall(path=extract_path)
print("Extraction completed.")


transform = transforms.Compose([
    transforms.Resize((224,224)),  # Resize images to a size suitable for VGG16
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize as per VGG16
])
extract_path_train = "./imagenette/imagenette2/train"
train_dataset = datasets.ImageFolder(
    root=extract_path_train,  # Imagenette URL
    transform=transform
)
extract_path_val = "./imagenette/imagenette2/val"
val_dataset = datasets.ImageFolder(
    root=extract_path_val,  # Imagenette URL
    transform=transform
)

batch_size = 2
sampler_train = sampler.SubsetRandomSampler(range(len(train_dataset)))
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler_train)

batch_size = 2
sampler_val = sampler.SubsetRandomSampler(range(1000))
val_loader = DataLoader(val_dataset, batch_size=batch_size, sampler=sampler_val)

# batch_size = 16
# sampler_test = sampler.SubsetRandomSampler(range(1000, len(val_dataset)))
# test_loader = DataLoader(val_dataset, batch_size=batch_size, sampler=sampler_test)


  from .autonotebook import tqdm as notebook_tqdm


Extraction completed.


In [None]:
import tensorflow as tf
from student.VGG_CMTKD_Student import VGG_CMTKD_Student
from student.VGG_CMTKD_Teacher import VGG_CMTKD_Teacher
import torch

alpha = 0.5
beta  = 0.5
temperature = 3
pi1=0.6
pi2=0.4

# Load the VGG16 model
teacher_1_model = VGG_CMTKD_Teacher(bit_width=6, num_of_classes=10, teacher_idx=1)
# teacher_2_model = VGG_CMTKD_Teacher(bit_width=4, num_of_classes=10, teacher_idx=2)

# student_model = VGG_CMTKD_Student(alpha=alpha, beta=beta, temperature=temperature, bit_width=2, pi1=pi1, pi2=pi2, num_of_classes=10)

device = 'cuda' if len(tf.config.list_physical_devices('GPU'))!=0 else 'cpu'
teacher_1_model.to(device)
# teacher_2_model.to(device)
# student_model.to(device)
print(device)



cuda


In [4]:
def check_accuracy_part34(loader, model, model_name):
    print(f'Checking accuracy on validation set for {model_name}')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=torch.float32)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))


In [None]:
import torch.optim as optim
import torch
import torch.nn.functional as F

teacher_1_optimizer = optim.Adam(teacher_1_model.parameters(), lr=0.01)
torch.set_grad_enabled(True)

num_epochs = 40
# batch_size = 16  
model_weights = None
print_every = 100

for epoch in range(num_epochs): 
    for batch_idx, (images, labels) in enumerate(train_loader):
        teacher_1_model.train()     
        # print('h1')
        images = images.to(device, dtype=torch.float32)
        labels = labels.to(device, dtype=torch.long)
        # print('h2')
        teacher_1_output = teacher_1_model(images)
        torch.save(teacher_1_model.cache, f'cache/Teacher1/cache_{epoch}_{batch_idx}')
        
        loss = F.cross_entropy(input=teacher_1_output, target=labels)
        # print('h4')
        
        teacher_1_optimizer.zero_grad()
        loss.backward()
        teacher_1_optimizer.step()
            
        if batch_idx % print_every == 0:
            print('Iteration %d, loss = %.4f' % (batch_idx, loss.item()))
            check_accuracy_part34(val_loader, teacher_1_model, "teacher_1_model")
            print()
            
        torch.cuda.empty_cache()
        del teacher_1_output, images, labels, loss
        
    print(f"Epoch {epoch} complete")

In [None]:
# import torch.optim as optim
# import torch

# student_optimizer = optim.Adam(student_model.parameters(), lr=0.01)
# teacher_1_optimizer = optim.Adam(teacher_1_model.parameters(), lr=0.01)
# teacher_2_optimizer = optim.Adam(teacher_2_model.parameters(), lr=0.01)
# # optimizer = optim.SGD(student_model.parameters(), lr=0.01, weight_decay=1e-4, momentum=0.9, nesterov=True)
# torch.set_grad_enabled(True)

# num_epochs = 40
# # batch_size = 16  
# model_weights = None
# print_every = 100

# for epoch in range(num_epochs): 
#     for batch_idx, (images, labels) in enumerate(train_loader):
#         teacher_1_model.train()
#         teacher_2_model.train()
#         student_model.train()           
#         # print('h1')
#         images = images.to(device, dtype=torch.float32)
#         labels = labels.to(device, dtype=torch.long)
#         # print('h2')
#         teacher_1_output = teacher_1_model(images)
#         teacher_2_output = teacher_2_model(images)
        
#         student_output = student_model(images)
#         # print('h3')
#         loss = student_model.loss(labels=labels, teacher_1_output=teacher_1_output, teacher_2_output=teacher_2_output, student_output=student_output)
#         # print('h4')
#         student_optimizer.zero_grad()
#         teacher_1_optimizer.zero_grad()
#         teacher_2_optimizer.zero_grad()
#         loss.backward()
#         student_optimizer.step()
#         teacher_1_optimizer.step()
#         teacher_2_optimizer.step()
            
#         if batch_idx % print_every == 0:
#             print('Iteration %d, loss = %.4f' % (batch_idx, loss.item()))
#             check_accuracy_part34(val_loader, teacher_1_model, "teacher_1_model")
#             check_accuracy_part34(val_loader, teacher_2_model, "teacher_2_model")
#             check_accuracy_part34(val_loader, student_model, "student_model")
#             print()
            
#         torch.cuda.empty_cache()
#         del teacher_1_output, teacher_2_output, student_output, images, labels, loss
        
#     print(f"Epoch {epoch} complete")

  x = self.classifier_activation(x)
  p = F.softmax(z/temperature)
  p_t = F.softmax(z_t/temperature)
  p_s = F.softmax(z_s/temperature)


OutOfMemoryError: CUDA out of memory. Tried to allocate 392.00 MiB (GPU 0; 5.80 GiB total capacity; 5.00 GiB already allocated; 371.06 MiB free; 5.29 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF