In [6]:
import torch
import torchvision.transforms as transforms

# Generate random image dataset
dataset_size = 1000
batch_size = 16
num_epochs = 10
image_shape = (3, 256, 256)

transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

data = torch.randn((dataset_size, *image_shape))

# Create dataloader
dataloader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)

# Train model
model = ...
optimizer = ...

for epoch in range(num_epochs):
    for images in dataloader:
        # Forward pass
        outputs = model(images)

        # Backward and optimize
        loss = ...
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

TypeError: 'ellipsis' object is not callable

In [17]:
import torch
from timm.models import ResNet
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
#from models import Teacher
import torchvision.transforms as transforms

# Generate random image dataset
dataset_size = 1000
batch_size = 16
num_epochs = 10
image_shape = (3, 256, 256)

transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

data = torch.randn((dataset_size, *image_shape))

# Create dataloader
dataloader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)

class DistillationTraining(object):

    def __init__(self,channel_size,save_path) -> None:
        self.channel_size = channel_size
        self.mean = torch.empty(channel_size)
        self.std = torch.empty(channel_size)
        self.save_path = save_path

    def normalize_channel(self,dataloader):
        for c in range(self.channel_size):
            X = torch.empty(0)
            for iteration in range(10000):
                ldist = next(dataloader)
                ldist = ldist.cuda()
                y = self.pretrain(ldist)
                y = y[:,c,:,:]
                y = y.view(-1)
                X = torch.cat((X,y),0)
            mean = torch.mean(X)
            std = torch.std(X)
            self.mean[c] = mean
            self.std[c] = std

    def load_pretrain(self):
        #self.pretrain=timm.create_model('wide_resnet101', pretrained=True)
        self.pretrain = ResNet(depth=50, num_classes=1000, widen_factor=2, drop_rate=0.3, drop_connect_rate=0.2)
        self.pretrain.load_state_dict(torch.load('pretrained_model.pth'))
        self.pretrain.eval()
        self.pretrain = self.pretrain.cuda()
    
    def compute_mse_loss(self,teacher,ldist):
        y = self.pretrain(ldist)
        y = y.view(y.shape[0],y.shape[1],-1)
        y = torch.transpose(y,1,2)
        y = y.view(-1,y.shape[2])
        y = (y - self.mean)/self.std
        y = y.view(y.shape[0],y.shape[1],1,1)
        y = torch.transpose(y,1,2)
        y = torch.transpose(y,2,3)
        y = y.view(y.shape[0],y.shape[1],y.shape[2],y.shape[3])
        y0 = teacher(ldist)
        loss = F.mse_loss(y,y0)
        return loss

    def train(self,):
        self.load_pretrain()
        imagenet_dataset = ImageNetDataset()
        dataloader = DataLoader(imagenet_dataset, batch_size=32, shuffle=True)
        ldist = next(dataloader)
        ldist = ldist.cuda()
        teacher = Teacher()
        teacher = teacher.cuda()
        self.normalize_channel(dataloader)
        optimizer = torch.optim.Adam(teacher.parameters(), lr=0.0001, weight_decay=0.00001)
        for iteration in range(60000):
            ldist = next(dataloader)
            ldist = ldist.cuda()
            optimizer.zero_grad()
            loss = self.compute_mse_loss(teacher,ldist)
            loss.backward()
            optimizer.step()
            if iteration% 10 ==0:
                print('iter:{},loss:{}'.format(iteration,loss.item()))

        # save teacher
        torch.save(teacher.state_dict(), '{}/teacher.pth'.format(self.save_path))