In [None]:
# Link Driver
from google.colab import drive
drive.mount('/content/drive')

In [None]:
root_path = '/content/drive/MyDrive/HUST/'
src = 'Data/E07-1/'
tar = 'Data/E21-3/'

tar_train = 'Train'
tar_val = 'Validation '
tar_test = "Test"

nclass = 4 #số lớp phân biệt
batch_size = 64
nepoch = 50
lr = [0.0001, 0.001, 0.01]
early_stop = 5
seed = 2021
weight = 0.5
momentum = 0.9
decay = 5e-4
bottleneck = True

version = src[-6:-1] + ' - ' + tar[-6:-1] + ' weight = ' +str(weight)
print(version)

MODEL

In [1]:
import torch
import torch.nn as nn
import math
import torchvision

ModuleNotFoundError: No module named 'torchvision'

In [None]:
class VGG16(nn.Module):
    def __init__(self, num_classes=1000):
        super(VGG16, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [None]:
def model(pretrained=False, **kwargs):
    model = VGG16(**kwargs)
    if pretrained:
        model.load_state_dict(torchvision.models.vgg16(pretrained=True).state_dict())
    return model

Local Maximum Mean Discrepancy (LMMD)

In [2]:
import numpy as np

In [None]:
class LMMD_loss(nn.Module):
    def __init__(self, class_num = nclass, kernel_type='rbf', kernel_mul=2.0, kernel_num=5, fix_sigma=None):
        super(LMMD_loss, self).__init__()
        self.class_num = class_num
        self.kernel_num = kernel_num
        self.kernel_mul = kernel_mul
        self.fix_sigma = fix_sigma
        self.kernel_type = kernel_type

    def guassian_kernel(self, source, target, kernel_mul=2.0, kernel_num=5, fix_sigma=None):
        n_samples = int(source.size()[0]) + int(target.size()[0])
        total = torch.cat([source, target], dim=0)
        total0 = total.unsqueeze(0).expand(
            int(total.size(0)), int(total.size(0)), int(total.size(1)))
        total1 = total.unsqueeze(1).expand(
            int(total.size(0)), int(total.size(0)), int(total.size(1)))
        L2_distance = ((total0-total1)**2).sum(2)
        if fix_sigma:
            bandwidth = fix_sigma
        else:
            bandwidth = torch.sum(L2_distance.data) / (n_samples**2-n_samples)
        bandwidth /= kernel_mul ** (kernel_num // 2)
        bandwidth_list = [bandwidth * (kernel_mul**i)
                          for i in range(kernel_num)]
        kernel_val = [torch.exp(-L2_distance / bandwidth_temp)
                      for bandwidth_temp in bandwidth_list]
        return sum(kernel_val)

    def get_loss(self, source, target, s_label, t_label):
        batch_size = source.size()[0]
        weight_ss, weight_tt, weight_st = self.cal_weight(s_label,
                                                          t_label,
                                                          batch_size=batch_size,
                                                          class_num=self.class_num)
        weight_ss = torch.from_numpy(weight_ss).cuda()
        weight_tt = torch.from_numpy(weight_tt).cuda()
        weight_st = torch.from_numpy(weight_st).cuda()

        kernels = self.guassian_kernel(source,
                                       target,
                                       kernel_mul=self.kernel_mul,
                                       kernel_num=self.kernel_num,
                                       fix_sigma=self.fix_sigma)
        loss = torch.Tensor([0]).cuda()
        if torch.sum(torch.isnan(sum(kernels))):
            return loss
        SS = kernels[:batch_size, :batch_size]
        TT = kernels[batch_size:, batch_size:]
        ST = kernels[:batch_size, batch_size:]

        loss += torch.sum(weight_ss * SS + weight_tt * TT - 2 * weight_st * ST)
        return loss

    def convert_to_onehot(self, sca_label, class_num=nclass):
        return np.eye(class_num)[sca_label]

    def cal_weight(self, s_label, t_label, batch_size=32, class_num=nclass):
        batch_size = s_label.size()[0]
        s_sca_label = s_label.cpu().data.numpy()
        s_vec_label = self.convert_to_onehot(s_sca_label, class_num=self.class_num)
        s_sum = np.sum(s_vec_label, axis=0).reshape(1, class_num)
        s_sum[s_sum == 0] = 100
        s_vec_label = s_vec_label / s_sum

        t_sca_label = t_label.cpu().data.max(1)[1].numpy()
        t_vec_label = t_label.cpu().data.numpy()
        t_sum = np.sum(t_vec_label, axis=0).reshape(1, class_num)
        t_sum[t_sum == 0] = 100
        t_vec_label = t_vec_label / t_sum

        index = list(set(s_sca_label) & set(t_sca_label))
        mask_arr = np.zeros((batch_size, class_num))
        mask_arr[:, index] = 1
        t_vec_label = t_vec_label * mask_arr
        s_vec_label = s_vec_label * mask_arr

        weight_ss = np.matmul(s_vec_label, s_vec_label.T)
        weight_tt = np.matmul(t_vec_label, t_vec_label.T)
        weight_st = np.matmul(s_vec_label, t_vec_label.T)

        length = len(index)
        if length != 0:
            weight_ss = weight_ss / length
            weight_tt = weight_tt / length
            weight_st = weight_st / length
        else:
            weight_ss = np.array([0])
            weight_tt = np.array([0])
            weight_st = np.array([0])
        return weight_ss.astype('float32'), weight_tt.astype('float32'), weight_st.astype('float32')

In [None]:
class DSAN(nn.Module):
    def __init__(self, num_classes=nclass, bottle_neck=True):
        super(DSAN, self).__init__()
        self.feature_layers = model(pretrained=True).features
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.flatten = nn.Flatten()
        self.lmmd_loss = LMMD_loss(class_num=num_classes)
        self.bottle_neck = bottle_neck
        if bottle_neck:
            self.bottle = nn.Linear(512 * 7 * 7, 256)
            self.cls_fc = nn.Linear(256, num_classes)
        else:
            self.cls_fc = nn.Linear(512 * 7 * 7, num_classes)

    def forward(self, source, target, s_label):
        source = self.feature_layers(source)
        source = self.avgpool(source)
        source = self.flatten(source)
        if self.bottle_neck:
            source = self.bottle(source)
        s_pred = self.cls_fc(source)

        target = self.feature_layers(target)
        target = self.avgpool(target)
        target = self.flatten(target)
        if self.bottle_neck:
            target = self.bottle(target)
        t_label = self.cls_fc(target)

        loss_lmmd = self.lmmd_loss.get_loss(source, target, s_label, torch.nn.functional.softmax(t_label, dim=1))

        return s_pred, loss_lmmd

    def predict(self, x):
        x = self.feature_layers(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        if self.bottle_neck:
            x = self.bottle(x)
        return self.cls_fc(x)

In [None]:
model = DSAN(num_classes= nclass).cuda()
print(model)

Load data

In [4]:
from torchvision import datasets, transforms
import os

In [3]:
import random
import shutil

In [None]:
def check_train_val_test_folders_exist(src_folder):
    train_folder = os.path.join(src_folder, tar_train)
    val_folder = os.path.join(src_folder, tar_val)
    test_folder = os.path.join(src_folder, tar_test)

    # Kiểm tra tính tồn tại của thư mục train, val, và test
    if os.path.exists(train_folder) and os.path.exists(val_folder) and os.path.exists(test_folder):
        print('Đã có cả ba thư mục '+ tar_train +', '+ tar_val +', và '+ tar_test +' trong thư mục nguồn.')
        return True
    else:
        return False

def split_data(src_folder, train_folder, val_folder, test_folder, train_ratio=0.64, val_ratio=0.16):
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(val_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)

    for folder_name in ['B', 'I', 'N', 'O']:
        os.makedirs(os.path.join(train_folder, folder_name), exist_ok=True)
        os.makedirs(os.path.join(val_folder, folder_name), exist_ok=True)
        os.makedirs(os.path.join(test_folder, folder_name), exist_ok=True)

        file_list = os.listdir(os.path.join(src_folder, folder_name))
        num_files = len(file_list)

        random.shuffle(file_list)

        num_train = int(num_files * train_ratio)
        num_val = int(num_files * val_ratio)

        train_files = file_list[:num_train]
        val_files = file_list[num_train:num_train+num_val]
        test_files = file_list[num_train+num_val:]

        for file in train_files:
            src_path = os.path.join(src_folder, folder_name, file)
            dest_path = os.path.join(train_folder, folder_name, file)
            shutil.copy(src_path, dest_path)

        for file in val_files:
            src_path = os.path.join(src_folder, folder_name, file)
            dest_path = os.path.join(val_folder, folder_name, file)
            shutil.copy(src_path, dest_path)

        for file in test_files:
            src_path = os.path.join(src_folder, folder_name, file)
            dest_path = os.path.join(test_folder, folder_name, file)
            shutil.copy(src_path, dest_path)

src_folder = root_path + tar

if not check_train_val_test_folders_exist(src_folder):
    train_folder = os.path.join(src_folder, tar_train)
    val_folder = os.path.join(src_folder, tar_val)
    test_folder = os.path.join(src_folder, tar_test)
    split_data(src_folder, train_folder, val_folder, test_folder, train_ratio=0.8, val_ratio=0)


tar_train = tar + tar_train
tar_val = tar + tar_val
tar_test = tar + tar_test

Đã có cả ba thư mục Train, Validation , và Test trong thư mục nguồn.


In [None]:
def rename_folders(folder_path):
    folders = ['B', 'I', 'N', 'O']
    new_names = ['2', '3', '1', '4']

    for folder, new_name in zip(folders, new_names):
        old_folder_path = os.path.join(folder_path, folder)
        new_folder_path = os.path.join(folder_path, new_name)

        if os.path.exists(old_folder_path):
            os.rename(old_folder_path, new_folder_path)
            print(f'Renamed "{folder}" to "{new_name}"')
        else:
            print(f'Folder "{folder}" not found in "{folder_path}".')

    renamed_folders = os.listdir(folder_path)
    print(f'Updated folder list: {renamed_folders}')

In [None]:
folders = ['B', 'I', 'N', 'O']
new_names = ['2', '3', '1', '4']

rename_folders(root_path + tar_train)
rename_folders(root_path + tar_val)

In [None]:
def load_training(root_path, dir, batch_size, kwargs):
    transform = transforms.Compose(
        [transforms.Resize([256, 256]),
         #transforms.RandomCrop(224),
         #transforms.RandomHorizontalFlip(),
         transforms.ToTensor()])
    data = datasets.ImageFolder(root=root_path + dir, transform=transform)
    train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, drop_last=True, **kwargs)
    return train_loader

In [None]:
def load_testing(root_path, dir, batch_size, kwargs):
    transform = transforms.Compose(
        [transforms.Resize([256, 256]),
         transforms.ToTensor()])
    data = datasets.ImageFolder(root=root_path + dir, transform=transform)
    test_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, **kwargs)
    return test_loader

In [None]:
def load_data(root_path, src, tar_train, tar_test, batch_size):
    kwargs = {'num_workers': 1, 'pin_memory': True}
    loader_src = load_training(root_path, src, batch_size, kwargs)
    loader_tar = load_training(root_path, tar_train, batch_size, kwargs)
    loader_tar_test = load_testing(root_path, tar_test, batch_size, kwargs)
    return loader_src, loader_tar, loader_tar_test

In [None]:
dataloaders = load_data(root_path, src, tar_val , tar_val, batch_size)

In [None]:
training_history =[
    ['epoch','Loss','lossCLS','lossLMMD','Accuracy'],
    ['Source', src ,'---','Target',tar]
]
print(training_history)

Train model

In [None]:
import torch.nn.functional as F

In [None]:
def train_epoch(epoch, model, dataloaders, optimizer):
    model.train()
    source_loader, target_train_loader, _ = dataloaders
    iter_source = iter(source_loader)
    iter_target = iter(target_train_loader)
    num_iter = len(source_loader)

    for i, (data_source, label_source) in enumerate(iter_source):
        data_target, _ = next(iter_target)
        if i % len(target_train_loader) == 0:
            iter_target = iter(target_train_loader)
        data_source, label_source = data_source.cuda(), label_source.cuda()
        data_target = data_target.cuda()

        optimizer.zero_grad()
        label_source_pred, loss_lmmd = model(data_source, data_target, label_source)
        loss_cls = F.nll_loss(F.log_softmax(label_source_pred, dim=1), label_source)
        lambd = 2 / (1 + math.exp(-10 * (epoch) / nepoch)) - 1
        loss = loss_cls + weight * lambd * loss_lmmd

        loss.backward()
        optimizer.step()
        print(f'Epoch: [{epoch:d}], Loss: {loss.item():.4f}, cls_Loss: {loss_cls.item():.4f}, loss_lmmd: {loss_lmmd.item():.4f}')
        training_history.append([float(str(epoch) + '.' + str(i)),loss.item(),loss_cls.item(),loss_lmmd.item(),'----'])

In [None]:
def test(model, dataloader):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for data, target in dataloader:
            data, target = data.cuda(), target.cuda()
            pred = model.predict(data)
            test_loss += F.nll_loss(F.log_softmax(pred, dim=1), target).item()
            pred = pred.data.max(1)[1]
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()

        test_loss /= len(dataloader)
        print(f'Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(dataloader.dataset)} ({100. * correct / len(dataloader.dataset):.2f}%)')
        training_history[-1][-1] = 100. * correct / len(dataloader.dataset)
        return correct

In [None]:
if __name__ == '__main__':
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    correct = 0
    stop = 0
    best_loss = float('inf')
    if bottleneck:
        optimizer = torch.optim.SGD([
            {'params': model.feature_layers.parameters()},
            {'params': model.bottle.parameters(), 'lr': lr[1]},
            {'params': model.cls_fc.parameters(), 'lr': lr[2]},
        ], lr=lr[0], momentum=momentum, weight_decay=decay)
    else:
        optimizer = torch.optim.SGD([
            {'params': model.feature_layers.parameters()},
            {'params': model.cls_fc.parameters(), 'lr': lr[1]},
        ], lr=lr[0], momentum=momentum, weight_decay=decay)

    for epoch in range(1, nepoch):
        stop += 1
        for index, param_group in enumerate(optimizer.param_groups):
            param_group['lr'] = lr[index] / math.pow((1 + 10 * (epoch - 1) / nepoch), 0.75)
        train_epoch(epoch, model, dataloaders, optimizer)


        current_loss = training_history[1][-1]
        if current_loss < best_loss:
            best_loss = current_loss
            stop = 0
        else:
            stop += 1

        if stop >= early_stop:
            print(f'Loss has not improved for {early_stop} consecutive epochs. Stopping training.')
            break

        # t_correct = test(model, dataloaders[-1])

        # if t_correct > correct:
        #     correct = t_correct
        #     stop = 0

        # print(f'{src}-{tar}: max correct: {correct} max accuracy: {100. * correct / len(dataloaders[-1].dataset):.2f}%\n')

        # if stop >= early_stop:
        #     print(f'Final test acc: {100. * correct / len(dataloaders[-1].dataset):.2f}%')
        #     training_history.append(['Final','Best','accuracy',100. * correct / len(dataloaders[-1].dataset)])
        #     break

Save model and logging data

In [None]:
os.makedirs(root_path + 'Save/'+version, exist_ok=True)
torch.save(model, root_path + 'Save/'+version+'/model_VGG.pkl')

In [None]:
import csv
csv_file = root_path + 'Save/'+version+'/training_history.csv'
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(training_history)

In [None]:
import matplotlib.pyplot as plt

In [None]:
epochs = [0]
for i in range(1, len([row[0] for row in training_history[2:-1]])):
    new_value = epochs[-1] + 1
    epochs.append(new_value)

loss = [row[1] for row in training_history[2:-1]]
lossCLS = [row[2] for row in training_history[2:-1]]
lossLMMD = [row[3] for row in training_history[2:-1]]

plt.figure(figsize=(10, 6))
plt.plot(epochs, loss, label='Loss', marker='')
plt.plot(epochs, lossCLS, label='lossCLS', marker='')
plt.plot(epochs, lossLMMD, label='lossLMMD', marker='')
plt.xlabel('Epoch')
plt.xticks([])
plt.ylabel('Loss')
plt.title('Training History')
plt.legend()
plt.grid(True)

plt.savefig(root_path + 'Save/'+version+'/Training History.png', bbox_inches='tight')
plt.show()
plt.clf()

Testting before training

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import datasets, transforms
import os

In [None]:
def predict_image(image_path, model_path):
    model = torch.load(model_path, map_location=torch.device('cuda'))
    model = model.to('cuda')
    model.eval()

    transform = transforms.Compose([
        transforms.Resize([256, 256]),
        transforms.ToTensor()
    ])
    image = Image.open(image_path)
    image = image.convert('RGB')
    input_tensor = transform(image)
    input_batch = input_tensor.unsqueeze(0).to('cuda')

    with torch.no_grad():
        output, _ = model(input_batch, input_batch, torch.tensor([0]).to('cuda'))
    output = output.cpu()
    return output


source_path = root_path + src
target_path = root_path + tar_test

model_path = root_path + 'Save/'+version+'/model_VGG.pkl'
def load_data(image_folder):
    valid_labels = ['B', 'I', 'N', 'O']
    true = []
    pred = []
    embeddings = []
    labels = []

    for folder in os.listdir(image_folder):
        folder_path = os.path.join(image_folder, folder)
        if os.path.isdir(folder_path) and folder[0] in valid_labels:
            print(f'Processing images in folder: {folder}')
            true_label = folder[0]

            for file in os.listdir(folder_path):
                if file.endswith(".jpg") or file.endswith(".png"):
                    file_path = os.path.join(folder_path, file)
                    output = predict_image(file_path, model_path)

                    _, predicted_idx = torch.max(output, 1)
                    predicted_label = predicted_idx.item()

                    if true_label == 'B':
                        true_label = int(0)
                    elif true_label == 'I':
                        true_label = int(1)
                    elif true_label == 'N':
                        true_label = int(2)
                    elif true_label == 'O':
                        true_label = int(3)

                    pred.append(predicted_label)
                    true.append(true_label)

                    embeddings.append(output.squeeze().cpu().numpy())
                    labels.append(predicted_label)

    return embeddings, labels, pred, true

In [5]:
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

ModuleNotFoundError: No module named 'sklearn'

In [None]:
print('Load source')
source_embeddings, source_labels, x_pred, x_true = load_data(source_path)
print('Load target')
target_embeddings, target_labels, y_pred, y_true = load_data(target_path)

In [None]:
accuracy = accuracy_score(y_true , y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')

print("Accuracy:", accuracy*100)
print("F1 Score:", f1*100)
print("Precision:", precision*100)
print("Recall:", recall*100)

file_path = root_path + 'Save/'+ version +'/test.txt'
with open(file_path, "w") as file:
    file.write("Accuracy: {:.4f}\n".format(accuracy*100))
    file.write("F1 Score: {:.4f}\n".format(f1*100))
    file.write("Precision: {:.4f}\n".format(precision*100))
    file.write("Recall: {:.4f}\n".format(recall*100))

In [None]:
y_true = np.array(y_true)
y_pred = np.array(y_pred)

cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['B', 'I', 'N', 'O'], yticklabels=['B', 'I', 'N', 'O'])
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Target Confusion Matrix')

plt.savefig(root_path + 'Save/'+version+'/Target Confusion Matrix.png')
plt.show()
plt.clf()

In [6]:
from sklearn.manifold import TSNE
import plotly.graph_objects as go

ModuleNotFoundError: No module named 'plotly'

In [None]:
source_embeddings = np.array(source_embeddings)
source_labels = np.array(source_labels)

target_embeddings = np.array(target_embeddings)
target_labels = np.array(target_labels)

In [None]:
tsne = TSNE(n_components=2, random_state=42)

source_tsne_representation = tsne.fit_transform(source_embeddings)
target_tsne_representation = tsne.fit_transform(target_embeddings)

In [None]:
import plotly.offline as pyo

fig = go.Figure()

label_colors = {
    0: 'blue',
    1: 'green',
    2: 'red',
    3: 'yellow',
}

source_label_map = {
    0: 'Source B',
    1: 'Source I',
    2: 'Source N',
    3: 'Source O',
}
target_label_map = {
    0: 'Target B',
    1: 'Target I',
    2: 'Target N',
    3: 'Target O',
}

for label in np.unique(source_labels):
    new_label = source_label_map[label]
    label_indices = np.where(source_labels == label)[0]
    x = source_tsne_representation[label_indices, 0]
    y = source_tsne_representation[label_indices, 1]
    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        mode='markers',
        marker=dict(symbol='x', size=10, color=label_colors[label]),
        showlegend=True,
        name=new_label
    ))

for label in np.unique(target_labels):
    new_label = target_label_map[label]
    label_indices = np.where(target_labels == label)[0]
    x = target_tsne_representation[label_indices, 0]
    y = target_tsne_representation[label_indices, 1]
    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        mode='markers',
        marker=dict(symbol='circle-open', size=8, color=label_colors[label]),
        showlegend=True,
        name=new_label
    ))

fig.update_layout(
    title='t-SNE Visualization of Source and Target Domains',
    width=1000,
    height=800,
    plot_bgcolor='rgba(200, 200, 200, 0.8)'
)

plot_filename = root_path + 'Save/'+ version + '/tsne_visualization.html'
pyo.plot(fig, filename=plot_filename, auto_open=False)

fig.show()