In [1]:
#각종 라이브러리를 임포트 해줍시다. 

%matplotlib inline
import torchvision
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader,Dataset
import matplotlib.pyplot as plt
import torchvision.utils
import numpy as np
import random
from PIL import Image
import torch, gc
from torch.autograd import Variable
import PIL.ImageOps    
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
import glob
from PIL import ExifTags
import shutil
from torch.optim.lr_scheduler import ReduceLROnPlateau
import copy

In [2]:
#각종 사용할 함수를 세팅합니다.

def imshow(img,text=None,should_save=False):
    npimg = img.numpy()
    plt.axis("off")
    if text:
        plt.text(75, 8, text, style='italic',fontweight='bold',
            bbox={'facecolor':'white', 'alpha':0.8, 'pad':10})
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()    

def show_plot(iteration,loss):
    plt.plot(iteration,loss)
    plt.show()

In [3]:
class Config():
    training_dir = "D:/training/"
    testing_dir = "D:/testing/"
    validation_dir = "D:/validation/"
    train_batch_size = 64
    train_number_epochs = 50

In [4]:
# 경로 지정
folder_path = 'D:\\Face_data2'
testing_folder = 'D:\\testing'
training_folder = 'D:\\training'
validation_folder = "D:\\validation"
val_lis = []

test_samples = 720 # testing 폴더 갯수
val_sampes = 360 # validation 폴더 갯수
random.seed(2023) # 랜덤시드 고정

folders = next(os.walk(folder_path))[1] # D 드라이브에 있는 폴더를 가져오기
test_folders = random.sample(folders, test_samples) # testing 폴더를 랜덤으로 뽑기
for folder in folders:
    if folder not in test_folders:
        val_lis.append(folder)

val_folders = random.sample(val_lis, val_sampes) # validation 폴더를 랜덤으로 뽑기

# 폴더 생성
os.makedirs(training_folder, exist_ok=True)
os.makedirs(testing_folder, exist_ok=True)
os.makedirs(validation_folder, exist_ok=True)

# testing 폴더에 복사
for folder in test_folders:
    source_path = os.path.join(folder_path, folder)
    destination_path = os.path.join(testing_folder, folder)
    shutil.copytree(source_path, destination_path)

# validation 폴더에 복사
for folder in val_folders:
    source_path = os.path.join(folder_path, folder)
    destination_path = os.path.join(validation_folder, folder)
    shutil.copytree(source_path, destination_path)

# 나머지는 training 폴더에 복사
for folder in folders:
    if folder not in test_folders and folder not in val_folders:
        source_path = os.path.join(folder_path, folder)
        destination_path = os.path.join(training_folder, folder)
        shutil.copytree(source_path, destination_path)

In [5]:
# path = "D:\\testing\\**"
# rot_img = []
# for f in glob.glob(path, recursive=True):
#     try:
#         img = Image.open(f)
#         exif_data = img._getexif()
#         if exif_data[274] != 1:
#             print(f, exif_data[274])
#             rot_img.append(img)
#     except:
#         pass

In [5]:
class SiameseNetworkDataset(Dataset):
    
    def __init__(self, imageFolderDataset, transform=None, should_invert=True):
        self.imageFolderDataset = imageFolderDataset    
        self.transform = transform
        self.should_invert = should_invert

    def rotate(self, img):
        try:
            if img._getexif()[274] not in [1, 2]:
                orientation = img._getexif()[274]
                 
                if orientation == 2:
                    img = img.transpose(Image.FLIP_LEFT_RIGHT)

                elif orientation == 3:
                    img = img.rotate(180)

                elif orientation == 4:
                    img = img.rotate(180).transpose(Image.FLIP_LEFT_RIGHT)

                elif orientation == 5:
                    img = img.rotate(-90, expand=True).transpose(Image.FLIP_LEFT_RIGHT)

                elif orientation == 6:
                    img = img.rotate(-90, expand=True)

                elif orientation == 7:
                    img = img.rotate(90, expand=True).transpose(Image.FLIP_LEFT_RIGHT)
            
            return img
        except:
            return img
        
    def __getitem__(self, index):
        img0_tuple = random.choice(self.imageFolderDataset.imgs)
        #we need to make sure approx 50% of images are in the same class
        should_get_same_class = random.randint(0,1) 
        if should_get_same_class:
            while True:
                #keep looping till the same class image is found
                img1_tuple = random.choice(self.imageFolderDataset.imgs) 
                if img0_tuple[1]==img1_tuple[1]:
                    break
        else:
            while True:
                #keep looping till a different class image is found
                
                img1_tuple = random.choice(self.imageFolderDataset.imgs) 
                if img0_tuple[1] !=img1_tuple[1]:
                    break

        img0 = Image.open(img0_tuple[0])
        img1 = Image.open(img1_tuple[0])

        if img0._getexif() not in [1, 2]:
            img0 = self.rotate(img0)
        
        if img1._getexif() not in [1, 2]:
            img1 = self.rotate(img1)

        img0 = img0.convert("L")
        img1 = img1.convert("L")
        
        if self.should_invert:
            img0 = PIL.ImageOps.invert(img0)
            img1 = PIL.ImageOps.invert(img1)

        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)

        return img0, img1 , torch.from_numpy(np.array([int(img1_tuple[1]!=img0_tuple[1])],dtype=np.float32))
    
    def __len__(self):
        return len(self.imageFolderDataset.imgs)

In [6]:
#training_dir 경로 안의 모든 파일을 폴더명과 함께 튜플로 저장
folder_dataset = dset.ImageFolder(root=Config.training_dir)
folder_dataset[4]

siamese_dataset = SiameseNetworkDataset(imageFolderDataset=folder_dataset,
                                        transform=transforms.Compose([transforms.Resize((100,100)),
                                                                      transforms.CenterCrop((100,100)),
                                                                      transforms.ToTensor()
                                                                      ]),
                                       should_invert=False)

In [8]:
# # 미니배치 생성기
# vis_dataloader = DataLoader(siamese_dataset,
#                         shuffle=True,
#                         num_workers=0,
#                         batch_size=8)
# # 미니배치 세트 생성
# dataiter = iter(vis_dataloader)

# example_batch = next(dataiter)

# concatenated = torch.cat((example_batch[0],example_batch[1]),0)
# imshow(torchvision.utils.make_grid(concatenated))
# print(example_batch[2].numpy())
# print(torchvision.utils.make_grid(concatenated).shape)

In [9]:
# # SiameseNetwork1
# class SiameseNetwork(nn.Module):
#     def __init__(self):
#         super(SiameseNetwork, self).__init__()
#         self.cnn1 = nn.Sequential(
#             nn.ReflectionPad2d(1),
#             nn.Conv2d(1, 8, kernel_size=3),
#             nn.ReLU(inplace=True),
#             nn.BatchNorm2d(8),
            
#             nn.ReflectionPad2d(1),
#             nn.Conv2d(8, 16, kernel_size=3),
#             nn.ReLU(inplace=True),
#             nn.BatchNorm2d(16),

#             nn.ReflectionPad2d(1),
#             nn.Conv2d(16, 16, kernel_size=3),
#             nn.ReLU(inplace=True),
#             nn.BatchNorm2d(16),

#         )

#         self.fc1 = nn.Sequential(
#             nn.Linear(16*100*100, 500),
#             nn.ReLU(inplace=True),

#             nn.Linear(500, 500),
#             nn.ReLU(inplace=True),

#             nn.Linear(500, 2))

#     def forward_once(self, x):
#         output = self.cnn1(x)
#         output = output.view(output.size()[0], -1)
#         output = self.fc1(output)
#         return output

#     def forward(self, input1, input2):
#         output1 = self.forward_once(input1)
#         output2 = self.forward_once(input2)
#         return output1, output2

In [47]:
# SiameseNetwork2
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        self.cnn1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(32),

            nn.MaxPool2d(kernel_size=2),
            
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(64),

            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(128),

            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(128),

            nn.MaxPool2d(kernel_size=2)
        )

        self.fc1 = nn.Sequential(
            nn.Linear(18432, 1024),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(1024),
            nn.Dropout(0.5),

            nn.Linear(1024, 1024),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(1024),
            nn.Dropout(0.5),

            nn.Linear(1024, 10)
        )

    def forward_once(self, x):
        output = self.cnn1(x)
        output = output.view(output.size()[0], -1)
        output = self.fc1(output)
        return output

    def forward(self, input1, input2):
        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)
        return output1, output2

In [16]:
class ContrastiveLoss(torch.nn.Module):

    def __init__(self, margin=2.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2, keepdim = True)
        loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                      (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))


        return loss_contrastive

In [17]:
# train 데이터셋
train_dataloader = DataLoader(siamese_dataset,
                        shuffle=True,
                        num_workers=0,
                        batch_size=Config.train_batch_size)
print(len(train_dataloader))

357


In [18]:
# validation 데이터셋
folder_dataset_val = dset.ImageFolder(root=Config.validation_dir)
siamese_dataset_val = SiameseNetworkDataset(imageFolderDataset=folder_dataset_val,
                                           transform=transforms.Compose([transforms.Resize((100, 100)),
                                                                         transforms.ToTensor()]),
                                           should_invert=False)
validation_dataloader = DataLoader(siamese_dataset_val, num_workers=0, batch_size=8) 
print(len(validation_dataloader))

In [19]:
# Early Stopping을 위해 validation loss를 계산하는 함수
def calculate_validation_loss(model, criterion, dataloader):
    total_loss = 0.0
    total_samples = 0
    
    model.eval()
    
    with torch.no_grad():
        for data in dataloader:
            inputs1, inputs2, labels = data
            inputs1 = inputs1.cuda()
            inputs2 = inputs2.cuda()
            labels = labels.cuda()
            
            outputs1, outputs2 = model(inputs1, inputs2)
            loss = criterion(outputs1, outputs2, labels)
            
            total_loss += loss.item() * len(inputs1)
            total_samples += len(inputs1)
    
    average_loss = total_loss / total_samples

    return average_loss


In [51]:
# 메모리 비우기
gc.collect()
torch.cuda.empty_cache()

# Early Stopping과 Model Checkpoint를 위한 변수 설정
patience = 3  # 검증 손실이 감소하지 않아도 참을 수 있는 epoch 수
early_stopping_counter = 0
best_loss = float('inf')
best_model_state = None
checkpoint_path = '../model/model2_checkpoint.pt'

# Initialize lists for training and validation losses
train_losses = []
val_losses = []

net = SiameseNetwork().cuda()
criterion = ContrastiveLoss()
optimizer = optim.Adam(net.parameters(), lr = 0.001 )
scheduler = ReduceLROnPlateau(optimizer, patience=5, factor=0.1, verbose=True)

counter = []
loss_history = [] 
iteration_number= 0

counter = []
loss_history = []
best_loss = float('inf')
best_model = None

for epoch in range(Config.train_number_epochs):
    for i, data in enumerate(train_dataloader, 0):
        img0, img1, label = data
        img0, img1, label = img0.cuda(), img1.cuda(), label.cuda()
        optimizer.zero_grad()
        output1, output2 = net(img0, img1)
        loss_contrastive = criterion(output1, output2, label)
        loss_contrastive.backward()
        optimizer.step()
        if i % 50 == 0:
            print("Epoch number {}\n Current loss {:.4f}\n".format(epoch, loss_contrastive.item()))
            iteration_number += 50
            counter.append(iteration_number)
            loss_history.append(loss_contrastive.item())
            
    # Calculate validation loss
    validation_loss = calculate_validation_loss(net, criterion, validation_dataloader)
    print(" Validation loss: {:.4f}\n".format(validation_loss))

    # Save the validation loss
    val_losses.append(validation_loss)
        
    # Save the best model based on validation loss
    if validation_loss < best_loss:
        best_loss = validation_loss
        best_model_state = copy.deepcopy(net.state_dict())
        # Save checkpoint
        torch.save(best_model_state, checkpoint_path)
        print(' Saved model\n')
            
    # Check for early stopping
    if epoch > patience and validation_loss > max(loss_history[-patience:]):
        early_stopping_counter += 1
        if early_stopping_counter >= patience:  
            break
    else:
        early_stopping_counter = 0

# Plot the training and validation losses
plt.plot(counter, loss_history, label='Training Loss')
plt.plot(range(len(val_losses)), val_losses, label='Validation Loss')
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.legend()
plt.show()

Epoch number 0
 Current loss 5.9597

Epoch number 0
 Current loss 3.2367

Epoch number 0
 Current loss 2.0242

Epoch number 0
 Current loss 1.1990

Epoch number 0
 Current loss 0.8700

Epoch number 0
 Current loss 0.8602

Epoch number 0
 Current loss 0.9318

Epoch number 0
 Current loss 0.5294

 Validation loss: 0.5279

 Saved model

Epoch number 1
 Current loss 0.5221

Epoch number 1
 Current loss 10.3720

Epoch number 1
 Current loss 23.2422



In [None]:
# 정확도를 계산하는 함수
def calculate_accuracy(predictions, labels):
    correct = (predictions == labels).sum().item()
    total = len(labels)
    accuracy = correct / total
    return accuracy

In [None]:
# 검증
folder_dataset_test = dset.ImageFolder(root=Config.testing_dir)
siamese_dataset = SiameseNetworkDataset(imageFolderDataset=folder_dataset_test,
                                        transform=transforms.Compose([transforms.Resize((100,100)),
                                                                      transforms.ToTensor()
                                                                      ]),
                                       should_invert=False)

test_dataloader = DataLoader(siamese_dataset, num_workers=0, batch_size=1, shuffle=True)
dataiter = iter(test_dataloader)
x0,_,_ = next(dataiter)

# Load the best model
if best_model is not None:
    net.load_state_dict(best_model)

# Load the best model from checkpoint
checkpoint = torch.load('../model/model2_checkpoint.pt')
net.load_state_dict(checkpoint)

# Evaluate the model on test data
net.eval()
test_accuracy = calculate_accuracy(net, test_dataloader)
print("Test Accuracy: {:.2f}%".format(test_accuracy * 100))

for i in range(len(dataiter)-1):
    _,x1,label2 = next(dataiter)
    concatenated = torch.cat((x0,x1),0)
    
    output1,output2 = net(Variable(x0).cuda(),Variable(x1).cuda())
    euclidean_distance = F.pairwise_distance(output1, output2)
    imshow(torchvision.utils.make_grid(concatenated),'Dissimilarity: {:.2f}'.format(euclidean_distance.item()))