In [4]:
from google.colab import drive
drive.mount('/content/drive')

%cd drive/MyDrive/ST/stargan

Mounted at /content/drive
/content/drive/MyDrive/ST/stargan


In [5]:
import pickle
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import LambdaLR
import torch.optim as optim

In [6]:
def get_data(dataset_name, class_idx, num_train_domains, domain):

    # Load the dataset
    with open(f'data/{dataset_name}.pkl', 'rb') as f:
        x, y, k = pickle.load(f)

    with open(f'data/{dataset_name}_fs.pkl', 'rb') as f:
        fs = pickle.load(f)

    # Filter out the samples that are used for finetuning
    x = x[fs == 0]
    y = y[fs == 0]
    k = k[fs == 0]

    x_ = x[(y == class_idx) & (k == domain)]
    y_ = y[(y == class_idx) & (k == domain)]
    k_ = k[(y == class_idx) & (k == domain)] - num_train_domains

    return x_, y_, k_


class TSTRClassifier(nn.Module):
    def __init__(self, num_timesteps=128, num_channels=3, num_classes=5):
        super(TSTRClassifier, self).__init__()

        self.conv1 = nn.Conv1d(num_channels, 16, kernel_size=5, stride=1, padding=2)
        self.bn1 = nn.BatchNorm1d(16)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=5, stride=1, padding=2)
        self.bn2 = nn.BatchNorm1d(32)
        self.conv3 = nn.Conv1d(32, 64, kernel_size=5, stride=1, padding=2)
        self.bn3 = nn.BatchNorm1d(64)
        self.conv4 = nn.Conv1d(64, 128, kernel_size=5, stride=1, padding=2)
        self.bn4 = nn.BatchNorm1d(128)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.25)

        self.fc_shared = nn.Linear(num_timesteps * 8, 100)

        self.fc_class = nn.Linear(100, num_classes)

    def forward(self, x):
        x = self.pool(self.relu(self.bn1(self.conv1(x))))
        x = self.pool(self.relu(self.bn2(self.conv2(x))))
        x = self.pool(self.relu(self.bn3(self.conv3(x))))
        x = self.pool(self.relu(self.bn4(self.conv4(x))))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.dropout(x)
        x = self.relu(self.fc_shared(x))

        # Final output for class prediction
        class_outputs = self.fc_class(x)
        return class_outputs


def remap_labels(y):
    label_map = {clss: i for i, clss in enumerate(np.unique(y))}
    return np.array([label_map[clss] for clss in y])


def setup_training(x_tr, y_tr, x_val, y_val, x_test, y_test, batch_size=64):
    # Convert numpy arrays to torch tensors
    x_train_tensor = torch.tensor(x_tr, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_tr, dtype=torch.long)
    x_val_tensor = torch.tensor(x_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val, dtype=torch.long)
    x_test_tensor = torch.tensor(x_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long)

    # Create datasets and loaders
    train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_dataset = TensorDataset(x_val_tensor, y_val_tensor)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader


def train_model(model, train_loader, val_loader, loss_fn, optimizer, epochs=300):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    loss_train = []
    loss_val = []
    accuracy_val = []
    best_loss = np.inf
    best_accuracy = 0

    # Set up linear learning rate decay
    lambda_lr = lambda epoch: 1 - epoch / epochs
    scheduler = LambdaLR(optimizer, lr_lambda=lambda_lr)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for x_batch, y_batch in train_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(x_batch)
            loss = loss_fn(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        total_loss /= len(train_loader)
        loss_train.append(total_loss)

        # Update learning rate
        scheduler.step()

        val_accuracy, val_loss = evaluate_model(model, val_loader, loss_fn)
        if val_accuracy > best_accuracy:
            best_epoch = epoch
            best_accuracy = val_accuracy
            best_loss = val_loss
            best_model_state = model.state_dict().copy()
        loss_val.append(val_loss)
        accuracy_val.append(val_accuracy)

        current_lr = scheduler.get_last_lr()[0]
        print(f"Epoch {epoch + 1}/{epochs} - Train loss: {total_loss:.4f} - Val loss: {val_loss:.4f} - Val accuracy: {val_accuracy:.4f} - LR: {current_lr:.6f}")

    print(f"\nBest epoch: {best_epoch + 1} - Best val accuracy: {best_accuracy:.4f} - Best val loss: {best_loss:.4f}\n")

    return best_model_state


def evaluate_model(model, test_loader, loss_fn):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    total_loss = 0
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        for x_batch, y_batch in test_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            loss = loss_fn(outputs, y_batch)
            total_loss += loss.item()

            _, predicted_labels = torch.max(outputs, 1)
            correct_predictions += (predicted_labels == y_batch).sum().item()
            total_predictions += len(y_batch)

    total_loss /= len(test_loader)
    accuracy = correct_predictions / total_predictions

    return accuracy, total_loss


def calculate_tstr_score(x_train, y_train, x_test, y_test):
    # Remap labels
    y_train = remap_labels(y_train)
    y_test = remap_labels(y_test)

    x_tr, x_val, y_tr, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=2710, stratify=y_train, shuffle=True)
    tr_loader, val_loader, test_loader = setup_training(x_tr, y_tr, x_val, y_val, x_test, y_test, batch_size=64)

    model = TSTRClassifier(num_timesteps=x_train.shape[2], num_channels=x_train.shape[1], num_classes=len(np.unique(y_train)))
    loss_fn = nn.CrossEntropyLoss()
    initial_lr = 0.0001
    optimizer = optim.Adam(model.parameters(), lr=initial_lr)

    best_model_state = train_model(model, tr_loader, val_loader, loss_fn, optimizer, epochs=50)
    best_model = TSTRClassifier(num_timesteps=x_train.shape[2], num_channels=x_train.shape[1], num_classes=len(np.unique(y_train)))
    best_model.load_state_dict(best_model_state)
    test_accuracy, test_loss = evaluate_model(best_model, test_loader, loss_fn)

    return test_accuracy, test_loss

In [7]:
dataset = 'realworld'

if dataset == 'realworld':
    dataset_name = 'realworld_128_3ch_4cl'
    num_df_domains = 10
    num_dp_domains = 5
    num_classes = 4
    class_names = ['WAL', 'RUN', 'CLD', 'CLU']

elif dataset == 'cwru':
    dataset_name = 'cwru_256_3ch_5cl'
    num_df_domains = 4
    num_dp_domains = 4
    num_classes = 5
    class_names = ['IR', 'Ball', 'OR_centred', 'OR_orthogonal', 'OR_opposite']

classes_dict = {clss: i for i, clss in enumerate(class_names)}

accs = {}

for domain in range(num_dp_domains):
    for src_class in class_names:
        trg_classes = [clss for clss in class_names if clss != src_class]

        x_trg = []
        y_trg = []
        k_trg = []

        for trg_class in trg_classes:
            class_idx = classes_dict[trg_class]
            x, y, k = get_data(dataset_name, class_idx, num_df_domains, domain)
            x_trg.append(x)
            y_trg.append(y)
            k_trg.append(k)

        x_trg = np.concatenate(x_trg)
        y_trg = np.concatenate(y_trg)
        k_trg = np.concatenate(k_trg)

        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2710)

        acc_fold_list = []

        for train_index, test_index in skf.split(x_trg, y_trg):
            x_train, x_test = x_trg[train_index], x_trg[test_index]
            y_train, y_test = y_trg[train_index], y_trg[test_index]

            acc_fold, _ = calculate_tstr_score(x_train, y_train, x_test, y_test)
            acc_fold_list.append(acc_fold)

        acc = np.mean(acc_fold_list)
        acc_std = np.std(acc_fold_list)

        accs[(src_class, domain)] = acc

        print(f'{src_class}, {domain}: {acc:.4f} ± {acc_std:.4f}\n')

print('\nMean accuracies:')
for src_class in class_names:
    print(f'{src_class}: {np.mean([accs[(src_class, domain)] for domain in range(num_dp_domains)]):.4f}')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 27/50 - Train loss: 0.1124 - Val loss: 0.2093 - Val accuracy: 0.9223 - LR: 0.000046
Epoch 28/50 - Train loss: 0.1082 - Val loss: 0.2060 - Val accuracy: 0.9223 - LR: 0.000044
Epoch 29/50 - Train loss: 0.1254 - Val loss: 0.2032 - Val accuracy: 0.9223 - LR: 0.000042
Epoch 30/50 - Train loss: 0.1099 - Val loss: 0.2031 - Val accuracy: 0.9223 - LR: 0.000040
Epoch 31/50 - Train loss: 0.1140 - Val loss: 0.1983 - Val accuracy: 0.9223 - LR: 0.000038
Epoch 32/50 - Train loss: 0.1021 - Val loss: 0.1977 - Val accuracy: 0.9126 - LR: 0.000036
Epoch 33/50 - Train loss: 0.0925 - Val loss: 0.1976 - Val accuracy: 0.9126 - LR: 0.000034
Epoch 34/50 - Train loss: 0.1008 - Val loss: 0.1936 - Val accuracy: 0.9223 - LR: 0.000032
Epoch 35/50 - Train loss: 0.0843 - Val loss: 0.1934 - Val accuracy: 0.9126 - LR: 0.000030
Epoch 36/50 - Train loss: 0.0830 - Val loss: 0.1958 - Val accuracy: 0.9126 - LR: 0.000028
Epoch 37/50 - Train loss: 0.1049 - 

In [9]:
dataset = 'cwru'

if dataset == 'realworld':
    dataset_name = 'realworld_128_3ch_4cl'
    num_df_domains = 10
    num_dp_domains = 5
    num_classes = 4
    class_names = ['WAL', 'RUN', 'CLD', 'CLU']

elif dataset == 'cwru':
    dataset_name = 'cwru_256_3ch_5cl'
    num_df_domains = 4
    num_dp_domains = 4
    num_classes = 5
    class_names = ['IR', 'Ball', 'OR_centred', 'OR_orthogonal', 'OR_opposite']

classes_dict = {clss: i for i, clss in enumerate(class_names)}

accs = {}

for domain in range(num_dp_domains):
    for src_class in class_names:
        trg_classes = [clss for clss in class_names if clss != src_class]

        x_trg = []
        y_trg = []
        k_trg = []

        for trg_class in trg_classes:
            class_idx = classes_dict[trg_class]
            x, y, k = get_data(dataset_name, class_idx, num_df_domains, domain)
            x_trg.append(x)
            y_trg.append(y)
            k_trg.append(k)

        x_trg = np.concatenate(x_trg)
        y_trg = np.concatenate(y_trg)
        k_trg = np.concatenate(k_trg)

        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2710)

        acc_fold_list = []

        for train_index, test_index in skf.split(x_trg, y_trg):
            x_train, x_test = x_trg[train_index], x_trg[test_index]
            y_train, y_test = y_trg[train_index], y_trg[test_index]

            acc_fold, _ = calculate_tstr_score(x_train, y_train, x_test, y_test)
            acc_fold_list.append(acc_fold)

        acc = np.mean(acc_fold_list)
        acc_std = np.std(acc_fold_list)

        accs[(src_class, domain)] = acc

        print(f'{src_class}, {domain}: {acc:.4f} ± {acc_std:.4f}\n')

print('\nMean accuracies:')
for src_class in class_names:
    print(f'{src_class}: {np.mean([accs[(src_class, domain)] for domain in range(num_dp_domains)]):.4f}')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 28/50 - Train loss: 0.0015 - Val loss: 0.0013 - Val accuracy: 1.0000 - LR: 0.000044
Epoch 29/50 - Train loss: 0.0015 - Val loss: 0.0013 - Val accuracy: 1.0000 - LR: 0.000042
Epoch 30/50 - Train loss: 0.0016 - Val loss: 0.0012 - Val accuracy: 1.0000 - LR: 0.000040
Epoch 31/50 - Train loss: 0.0014 - Val loss: 0.0012 - Val accuracy: 1.0000 - LR: 0.000038
Epoch 32/50 - Train loss: 0.0015 - Val loss: 0.0012 - Val accuracy: 1.0000 - LR: 0.000036
Epoch 33/50 - Train loss: 0.0014 - Val loss: 0.0011 - Val accuracy: 1.0000 - LR: 0.000034
Epoch 34/50 - Train loss: 0.0013 - Val loss: 0.0011 - Val accuracy: 1.0000 - LR: 0.000032
Epoch 35/50 - Train loss: 0.0011 - Val loss: 0.0010 - Val accuracy: 1.0000 - LR: 0.000030
Epoch 36/50 - Train loss: 0.0013 - Val loss: 0.0010 - Val accuracy: 1.0000 - LR: 0.000028
Epoch 37/50 - Train loss: 0.0014 - Val loss: 0.0010 - Val accuracy: 1.0000 - LR: 0.000026
Epoch 38/50 - Train loss: 0.0011 - 