In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
class MyDataset(Dataset):
    
    def __init__(self, x_data, y_data, transform=None):
        
        self.x_data = x_data # torch.floattensor로 들어옴
        self.y_data = y_data#.view(-1,1) # torch.longtensor로 들어옴
        self.transform = transform
        self.len = len(y_data)
    
    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]
        
        if self.transform:
            sample = self.transform(sample) #self.transform이 None이 아니라면 전처리를 작업한다.
        
        return sample 
    
    def __len__(self):
        return self.len       

class TrainTransform:
    
    def __call__(self, sample):
        inputs, labels = sample
        #labels = labels.float()

        transf = transforms.Compose([
                    transforms.ToPILImage(),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor()
                    ])
        final_output = transf(inputs)      
        
        return final_output, labels 

In [4]:
def balanced_subset(data, labels, num_cls, num_data): # numpy
    num_data_per_class = num_data // num_cls
    data1 = torch.tensor([],dtype=torch.float)
    data2 = torch.tensor([],dtype=torch.float)
    labels1 = torch.tensor([],dtype=torch.long)
    labels2 = torch.tensor([],dtype=torch.long)
    for cls in range(num_cls):
        idx = np.where(labels.numpy() == cls)[0]
        shuffled_idx = np.random.choice(len(idx), len(idx), replace=False)
        data1 = torch.cat([data1, data[shuffled_idx[:num_data_per_class]]], dim=0)
        data2 = torch.cat([data2, data[shuffled_idx[num_data_per_class:]]], dim=0)     
        labels1 = torch.cat([labels1, labels[shuffled_idx[:num_data_per_class]]], dim=0)
        labels2 = torch.cat([labels2, labels[shuffled_idx[num_data_per_class:]]], dim=0)

    return data1, data2, labels1, labels2

In [5]:
torch.tensor([],dtype=torch.float)

tensor([])

In [7]:
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True) # 50000

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:01<00:00, 9867267.35it/s] 


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<?, ?it/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 8100823.09it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<?, ?it/s]

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






In [8]:
labeled_data, unlabeled_data, labels, unlabels = balanced_subset(trainset.data, trainset.targets, num_cls=10, num_data=2000)
train_images, val_images, train_labels, val_labels = balanced_subset(labeled_data, labels, num_cls=10, num_data=1000)

In [11]:
labeled_data.shape, unlabeled_data.shape, labels, unlabels 

(torch.Size([2000, 28, 28]),
 torch.Size([58000, 28, 28]),
 tensor([6, 7, 8,  ..., 5, 4, 1]),
 tensor([5, 6, 0,  ..., 9, 2, 9]))

In [14]:
labeled_data[0][0]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.])

In [15]:
train_images = train_images.unsqueeze(1)
val_images = val_images.unsqueeze(1)
trainset = MyDataset(train_images, train_labels, transform=TrainTransform())
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)       
validationset = MyDataset(val_images, val_labels)
valloader = torch.utils.data.DataLoader(validationset, batch_size=128, shuffle=False)    

In [16]:
unlabeled_images = unlabeled_data.unsqueeze(1)
unlabeledset = MyDataset(unlabeled_images, unlabels)
unlabeledloader = torch.utils.data.DataLoader(unlabeledset, batch_size=256, shuffle=True)   

In [17]:
# 데이터 불러오기 및 전처리 작업
transform = transforms.Compose([transforms.ToTensor()])
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=100,shuffle=False)

In [18]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.features = nn.Sequential(
                        nn.Conv2d(1, 64, 3), nn.ReLU(),
                        nn.MaxPool2d(2, 2),
                        nn.Conv2d(64, 192, 3, padding=1), nn.ReLU(),
                        nn.MaxPool2d(2, 2))       
        self.classifier = nn.Sequential(
                        nn.Dropout(0.5),
                        nn.Linear(192*6*6, 1024), nn.ReLU(),
                        nn.Dropout(0.5),
                        nn.Linear(1024, 512), nn.ReLU(),
                        nn.Linear(512, 10))          
    def forward(self, x):
        x = self.features(x)
        x = x.view(-1, 192*6*6)
        x = self.classifier(x)    
        return x

model = Net().to(device) # 모델 선언

In [19]:
from torchsummary import summary

In [20]:
summary(model, input_size=(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 26, 26]             640
              ReLU-2           [-1, 64, 26, 26]               0
         MaxPool2d-3           [-1, 64, 13, 13]               0
            Conv2d-4          [-1, 192, 13, 13]         110,784
              ReLU-5          [-1, 192, 13, 13]               0
         MaxPool2d-6            [-1, 192, 6, 6]               0
           Dropout-7                 [-1, 6912]               0
            Linear-8                 [-1, 1024]       7,078,912
              ReLU-9                 [-1, 1024]               0
          Dropout-10                 [-1, 1024]               0
           Linear-11                  [-1, 512]         524,800
             ReLU-12                  [-1, 512]               0
           Linear-13                   [-1, 10]           5,130
Total params: 7,720,266
Trainable param

In [22]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
#scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100,200], gamma=0.1)

In [23]:
def accuracy(dataloader):
    correct = 0
    total = 0
    with torch.no_grad():
        model.eval()
        for data in dataloader:
            images, labels = data[0].to(device), data[1].to(device)       
            outputs = model(images)
            _, predicted = torch.max(outputs.detach(), 1)
            total += labels.size(0)      
            correct += (predicted == labels).sum().item()

    acc = 100*correct/total
    model.train()
    return acc

### 전통적인 지도 학습(일반적인 분류) 방식
1. 오직 레이블이 있는 데이터만 사용하여 모델을 훈련
2. 훈련 중에 검증 데이터에 대한 정확도를 모니터링하며, 가장 높은 정확도를 달성한 경우 모델을 저장합니다.

In [24]:
best_acc = 0
for epoch in range(101):
    correct = 0
    total = 0
    for traindata in trainloader: 
       
        inputs, labels = traindata[0].to(device), traindata[1].to(device)     
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)      
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(outputs.detach(), 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    val_acc = accuracy(valloader)
    if val_acc >= best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), './models/cifar_model_for_pseudo_baseline.pth')  
        print('[%d] train acc: %.2f, validation acc: %.2f - Saved the best model' %(epoch, 100*correct/total, val_acc))  
    elif epoch % 10 == 0:
        print('[%d] train acc: %.2f, validation acc: %.2f' %(epoch, 100*correct/total, val_acc))  

[0] train acc: 13.40, validation acc: 13.40 - Saved the best model
[1] train acc: 23.30, validation acc: 46.00 - Saved the best model
[2] train acc: 44.00, validation acc: 52.90 - Saved the best model
[3] train acc: 53.00, validation acc: 56.20 - Saved the best model
[4] train acc: 58.90, validation acc: 64.20 - Saved the best model
[10] train acc: 91.40, validation acc: 56.00
[12] train acc: 93.60, validation acc: 66.60 - Saved the best model
[14] train acc: 95.90, validation acc: 71.50 - Saved the best model
[20] train acc: 98.50, validation acc: 64.10
[30] train acc: 99.50, validation acc: 56.70
[40] train acc: 99.90, validation acc: 52.50
[50] train acc: 99.70, validation acc: 57.90
[58] train acc: 100.00, validation acc: 72.10 - Saved the best model
[60] train acc: 99.90, validation acc: 70.80
[70] train acc: 99.90, validation acc: 57.60
[76] train acc: 99.60, validation acc: 72.60 - Saved the best model
[80] train acc: 99.90, validation acc: 66.90
[85] train acc: 98.90, validatio

In [25]:
# model.load_state_dict(torch.load('./models/cifar_model_for_pseudo_baseline.pth'))
accuracy(testloader)

71.98

예측값을 기준으로하는 의사라벨과 예측값을 비교하여 손실 함수를 계산한다.

In [26]:
model = Net().to(device) # 모델 선언
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

###  반지도 학습(Semi-Supervised Learning)의 일종인 "일관성 정규화(Consistency Regularization)"

1. unlabeledloader에서 비지도 학습 데이터인 "의사 레이블(pseudo-label)"을 사용합니다.
2. 모델의 출력과 의사 레이블 간의 일관성을 유지하기 위해 손실 함수에 추가적인 항 (alpha * criterion(poutputs, plabels))을 사용합니다.
3. T1 에폭 이후부터 T2 에폭까지 alpha 값을 선형적으로 증가시킵니다. 이것은 일관성 정규화의 가중치를 조절하는 데 사용됩니다.

alpha 값(즉, pseudo-labeled data의 중요도)에 따라 loss 계산 방식이 달라집니다. alpha가 0보다 큰 경우에만 pseudo-labeled data의 loss가 포함되며, 그렇지 않은 경우 labeled data만으로 loss를 계산

In [27]:
alpha = 0
alpha_t = 1e-4
T1 = 100
T2 = 450
best_acc = 0

for epoch in range(101):
    correct = 0
    total = 0
    for traindata, pseudodata in zip(trainloader, unlabeledloader): 
       
        inputs, labels = traindata[0].to(device), traindata[1].to(device)     
        pinputs = pseudodata[0].to(device) 
        optimizer.zero_grad()
        outputs = model(inputs)

        if alpha > 0:            
            poutputs = model(pinputs)  
            _, plabels = torch.max(poutputs.detach(), 1)     
            loss = criterion(outputs, labels)  + alpha * criterion(poutputs, plabels)   
        else:    
            loss = criterion(outputs, labels)    
              
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(outputs.detach(), 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    if (epoch > T1) and (epoch < T2):
        alpha = alpha_t*(epoch - T1)/(T2 - T1)
    elif epoch >= T2:    
        alpha = alpha_t

    val_acc = accuracy(valloader)
    if val_acc >= best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), './models/cifar_model_for_pseudo_label.pth')    
        print('[%d] train acc: %.2f, validation acc: %.2f - Saved the best model' %(epoch, 100*correct/total, val_acc))  

    elif epoch % 10 == 0:
        print('[%d] train acc: %.2f, validation acc: %.2f' %(epoch, 100*correct/total, val_acc))  

[0] train acc: 17.30, validation acc: 26.90 - Saved the best model
[1] train acc: 31.40, validation acc: 48.50 - Saved the best model
[3] train acc: 54.80, validation acc: 48.50 - Saved the best model
[4] train acc: 68.90, validation acc: 51.20 - Saved the best model
[10] train acc: 93.70, validation acc: 42.70
[20] train acc: 99.30, validation acc: 35.00
[30] train acc: 100.00, validation acc: 33.10
[40] train acc: 100.00, validation acc: 35.00
[50] train acc: 100.00, validation acc: 34.40
[60] train acc: 99.80, validation acc: 36.80
[70] train acc: 100.00, validation acc: 40.50
[79] train acc: 99.60, validation acc: 56.70 - Saved the best model
[80] train acc: 99.60, validation acc: 43.60
[84] train acc: 99.50, validation acc: 59.50 - Saved the best model
[86] train acc: 99.70, validation acc: 60.20 - Saved the best model
[87] train acc: 99.70, validation acc: 64.10 - Saved the best model
[90] train acc: 99.90, validation acc: 51.00
[100] train acc: 99.90, validation acc: 38.00


In [28]:
# model.load_state_dict(torch.load('./models/cifar_model_for_pseudo_label.pth'))
accuracy(testloader)

66.47

학습 데이터로만 학습한 모델을 가지고 의사라벨을 만들어 데이터로 활용한다.

In [30]:
model = Net().to(device) # 모델 선언
model.load_state_dict(torch.load('./models/cifar_model_for_pseudo_baseline.pth'))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [31]:
pseudo_threshold = 0.99
pseudo_images = torch.tensor([], dtype=torch.float)
pseudo_labels = torch.tensor([], dtype=torch.long)

with torch.no_grad():
    for data in tqdm(unlabeledloader):
        model.eval()
        images = data[0].to(device)
        outputs = model(images)
        #print(outputs.size())
        outputs = torch.nn.functional.softmax(outputs, dim=1)
        
        max_val, predicted = torch.max(outputs.detach(), 1)
        idx = np.where(max_val.cpu().numpy() >= pseudo_threshold)[0]
        if len(idx) > 0:
            pseudo_images = torch.cat((pseudo_images, images.cpu()[idx]), 0) 
            pseudo_labels = torch.cat((pseudo_labels, predicted.cpu()[idx]), 0)

#print(pseudo_images.size(), pseudo_labels.size())        

100%|██████████| 227/227 [00:10<00:00, 22.63it/s]


In [32]:
print(pseudo_images.size(), pseudo_labels.size())   

torch.Size([57911, 1, 28, 28]) torch.Size([57911])


In [33]:
pseudo_dataset = MyDataset(pseudo_images, pseudo_labels)
pseudoloader = torch.utils.data.DataLoader(pseudo_dataset, batch_size=256, shuffle=True)   

 alpha 값과 관계없이 항상 labeled data와 pseudo-labeled data 모두에 대한 loss를 계산

In [34]:
alpha = 0
alpha_t = 1e-4
T1 = 20
T2 = 450
best_acc = 0

for epoch in range(101):
    correct = 0
    total = 0
    for traindata, pseudodata in zip(trainloader, pseudoloader): 
       
        inputs, labels = traindata[0].to(device), traindata[1].to(device)     
        pinputs, plabels = pseudodata[0].to(device), pseudodata[1].to(device)    
        optimizer.zero_grad()
        outputs = model(inputs)
        poutputs = model(pinputs)
        loss = criterion(outputs, labels) + alpha*criterion(poutputs, plabels)         
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(outputs.detach(), 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    #scheduler.step()
    if (epoch > T1) and (epoch < T2):
        alpha = alpha_t*(epoch - T1)/(T2 - T1)
        
    elif epoch >= T2:    
        alpha = alpha_t

    val_acc = accuracy(valloader)
    if val_acc >= best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), './models/cifar_model_for_pseudo_label2.pth') 
        print('[%d] train acc: %.2f, validation acc: %.2f - Saved the best model' %(epoch, 100*correct/total, val_acc))  

    elif epoch % 10 == 0:
        print('[%d] train acc: %.2f, validation acc: %.2f' %(epoch, 100*correct/total, val_acc))  


[0] train acc: 99.70, validation acc: 59.80 - Saved the best model
[1] train acc: 99.30, validation acc: 66.50 - Saved the best model
[2] train acc: 99.80, validation acc: 80.40 - Saved the best model
[10] train acc: 99.40, validation acc: 64.30
[20] train acc: 99.50, validation acc: 63.00
[26] train acc: 99.60, validation acc: 80.40 - Saved the best model
[30] train acc: 99.80, validation acc: 80.60 - Saved the best model
[40] train acc: 99.70, validation acc: 75.70
[50] train acc: 99.70, validation acc: 57.70
[60] train acc: 99.40, validation acc: 58.70
[70] train acc: 99.70, validation acc: 46.40
[80] train acc: 100.00, validation acc: 39.70
[90] train acc: 99.90, validation acc: 31.00
[100] train acc: 99.90, validation acc: 32.40


In [35]:
# model.load_state_dict(torch.load('./models/cifar_model_for_pseudo_label2.pth'))
accuracy(testloader)

57.94