In [1]:
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import LambdaLR
import torch.optim as optim
import os
import csv

seed = 2710
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)

In [2]:
class TSTRClassifier(nn.Module):
    def __init__(self, num_timesteps=128, num_channels=3, num_classes=5):
        super(TSTRClassifier, self).__init__()

        self.conv1 = nn.Conv1d(num_channels, 16, kernel_size=5, stride=1, padding=2)
        self.bn1 = nn.BatchNorm1d(16)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=5, stride=1, padding=2)
        self.bn2 = nn.BatchNorm1d(32)
        self.conv3 = nn.Conv1d(32, 64, kernel_size=5, stride=1, padding=2)
        self.bn3 = nn.BatchNorm1d(64)
        self.conv4 = nn.Conv1d(64, 128, kernel_size=5, stride=1, padding=2)
        self.bn4 = nn.BatchNorm1d(128)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.25)

        self.fc_shared = nn.Linear(num_timesteps * 8, 100)

        self.fc_class = nn.Linear(100, num_classes)

    def forward(self, x):
        x = self.pool(self.relu(self.bn1(self.conv1(x))))
        x = self.pool(self.relu(self.bn2(self.conv2(x))))
        x = self.pool(self.relu(self.bn3(self.conv3(x))))
        x = self.pool(self.relu(self.bn4(self.conv4(x))))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.dropout(x)
        x = self.relu(self.fc_shared(x))

        # Final output for class prediction
        class_outputs = self.fc_class(x)
        return class_outputs


def get_fs_data(dataset_name, class_idx, num_train_domains, domain):

    # Load the dataset
    with open(f'data/{dataset_name}.pkl', 'rb') as f:
        x, y, k = pickle.load(f)

    with open(f'data/{dataset_name}_fs.pkl', 'rb') as f:
        fs = pickle.load(f)

    x = x[fs == 1]
    y = y[fs == 1]
    k = k[fs == 1]
    
    x_ = x[(y == class_idx) & (k == domain)]
    y_ = y[(y == class_idx) & (k == domain)]
    k_ = k[(y == class_idx) & (k == domain)]

    return x_, y_, k_


def get_non_fs_data(dataset_name, class_idx, num_train_domains, domain):

    # Load the dataset
    with open(f'data/{dataset_name}.pkl', 'rb') as f:
        x, y, k = pickle.load(f)

    with open(f'data/{dataset_name}_fs.pkl', 'rb') as f:
        fs = pickle.load(f)

    x = x[fs == 0]
    y = y[fs == 0]
    k = k[fs == 0]
    
    x_ = x[(y == class_idx) & (k == domain)]
    y_ = y[(y == class_idx) & (k == domain)]
    k_ = k[(y == class_idx) & (k == domain)]

    return x_, y_, k_


def setup_training(x_tr, y_tr, x_val, y_val, batch_size=64):
    # Convert numpy arrays to torch tensors
    x_train_tensor = torch.tensor(x_tr, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_tr, dtype=torch.long)
    x_val_tensor = torch.tensor(x_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val, dtype=torch.long)

    # Create datasets and loaders
    train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_dataset = TensorDataset(x_val_tensor, y_val_tensor)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader


def train_model(model, train_loader, val_loader, loss_fn, optimizer, epochs=300):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    loss_train = []
    loss_val = []
    accuracy_val = []
    best_loss = np.inf
    best_accuracy = 0

    # Set up linear learning rate decay
    lambda_lr = lambda epoch: 1 - epoch / epochs
    scheduler = LambdaLR(optimizer, lr_lambda=lambda_lr)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for x_batch, y_batch in train_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(x_batch)
            loss = loss_fn(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        total_loss /= len(train_loader)
        loss_train.append(total_loss)

        # Update learning rate
        scheduler.step()

        val_accuracy, val_loss = evaluate_model(model, val_loader, loss_fn)
        if val_accuracy > best_accuracy:
            best_epoch = epoch
            best_accuracy = val_accuracy
            best_loss = val_loss
            best_model_state = model.state_dict().copy()
        loss_val.append(val_loss)
        accuracy_val.append(val_accuracy)

        current_lr = scheduler.get_last_lr()[0]
        print(f"Epoch {epoch + 1}/{epochs} - Train loss: {total_loss:.4f} - Val loss: {val_loss:.4f} - Val accuracy: {val_accuracy:.4f} - LR: {current_lr:.6f}")

    print(f"Best epoch: {best_epoch + 1} - Best val accuracy: {best_accuracy:.4f} - Best val loss: {best_loss:.4f}")

    return best_model_state


def evaluate_model(model, test_loader, loss_fn):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    total_loss = 0
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        for x_batch, y_batch in test_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            loss = loss_fn(outputs, y_batch)
            total_loss += loss.item()

            _, predicted_labels = torch.max(outputs, 1)
            correct_predictions += (predicted_labels == y_batch).sum().item()
            total_predictions += len(y_batch)

    total_loss /= len(test_loader)
    accuracy = correct_predictions / total_predictions

    return accuracy, total_loss


def remap_labels(y):
    label_map = {clss: i for i, clss in enumerate(np.unique(y))}
    return np.array([label_map[clss] for clss in y])


def calculate_tstr_score(x_train, y_train, x_test, y_test):
    # Remap labels
    y_train = remap_labels(y_train)
    y_test = remap_labels(y_test)

    x_tr, x_val, y_tr, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=2710, stratify=y_train, shuffle=True)
    tr_loader, val_loader = setup_training(x_tr, y_tr, x_val, y_val, batch_size=64)

    model = TSTRClassifier(num_timesteps=x_train.shape[2], num_channels=x_train.shape[1], num_classes=len(np.unique(y_train)))
    loss_fn = nn.CrossEntropyLoss()
    initial_lr = 0.0001
    optimizer = optim.Adam(model.parameters(), lr=initial_lr)

    best_model_state = train_model(model, tr_loader, val_loader, loss_fn, optimizer, epochs=50)
    best_model = TSTRClassifier(num_timesteps=x_train.shape[2], num_channels=x_train.shape[1], num_classes=len(np.unique(y_train)))
    best_model.load_state_dict(best_model_state)
    
    x_test_tensor = torch.tensor(x_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long)
    test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    test_accuracy, test_loss = evaluate_model(best_model, test_loader, loss_fn)

    return test_accuracy, test_loss


def save_score(accuracy, loss, source, domain, name, dataset):
    eval_dir = 'bounds_fs'
    # Ensure the directory exists
    os.makedirs(eval_dir, exist_ok=True)
    # Path to the CSV file
    file_path = os.path.join(eval_dir, f'{name}_{dataset}.csv')
    # Check if the file exists
    file_exists = os.path.exists(file_path)

    # Open the file in append mode if it exists, or write mode if it doesn't
    with open(file_path, mode='a' if file_exists else 'w', newline='') as file:
        writer = csv.writer(file)
        # If the file does not exist, write the header
        if not file_exists:
            writer.writerow(['source', 'domain', 'accuracy', 'loss'])
        # Write the data rows
        writer.writerow([source, domain, accuracy, loss])

    

In [3]:
dataset = 'realworld'

if dataset == 'realworld':
    dataset_name = 'realworld_128_3ch_4cl'
    num_df_domains = 10
    num_dp_domains = 5
    num_classes = 4
    class_names = ['WAL', 'RUN', 'CLD', 'CLU']

elif dataset == 'cwru':
    dataset_name = 'cwru_256_3ch_5cl'
    num_df_domains = 4
    num_dp_domains = 4
    num_classes = 5
    class_names = ['IR', 'Ball', 'OR_centred', 'OR_orthogonal', 'OR_opposite']

classes_dict = {clss: i for i, clss in enumerate(class_names)}

accs = {}

for domain in range(num_dp_domains):
    for src_class in class_names:
        x_fs = []
        y_fs = []
        k_fs = []

        x_nfs = []
        y_nfs = []
        k_nfs = []

        trg_classes = [clss for clss in class_names if clss != src_class]
        for trg_class in trg_classes:

            x_fs_, y_fs_, k_fs_ = get_fs_data(dataset_name, classes_dict[trg_class], num_df_domains, domain)
            x_nfs_, y_nfs_, k_nfs_ = get_non_fs_data(dataset_name, classes_dict[trg_class], num_df_domains, domain)

            x_fs.append(x_fs_)
            y_fs.append(y_fs_)
            k_fs.append(k_fs_)

            x_nfs.append(x_nfs_)
            y_nfs.append(y_nfs_)
            k_nfs.append(k_nfs_)

        x_fs = np.concatenate(x_fs)
        y_fs = np.concatenate(y_fs)
        k_fs = np.concatenate(k_fs)

        x_nfs = np.concatenate(x_nfs)
        y_nfs = np.concatenate(y_nfs)
        k_nfs = np.concatenate(k_nfs)

        print(f'{src_class} - Domain {domain} - FS: {x_fs.shape[0]} - NFS: {x_nfs.shape[0]}')
    
        acc, loss = calculate_tstr_score(x_fs, y_fs, x_nfs, y_nfs)
        accs[(src_class, domain)] = acc

        print(f'{src_class} - Domain {domain} - Accuracy: {acc:.4f}\n')
        save_score(acc, loss, src_class, domain+num_df_domains, 'LB_real_fs', dataset)


for src_class in class_names:
    mean_acc = np.mean([accs[(src_class, domain)] for domain in range(num_dp_domains)])
    print(f'{src_class}: {mean_acc:.4f}')

WAL - Domain 0 - FS: 15 - NFS: 623
Epoch 1/50 - Train loss: 1.1501 - Val loss: 1.0999 - Val accuracy: 0.3333 - LR: 0.000098
Epoch 2/50 - Train loss: 1.0975 - Val loss: 1.1008 - Val accuracy: 0.3333 - LR: 0.000096
Epoch 3/50 - Train loss: 1.0428 - Val loss: 1.1020 - Val accuracy: 0.3333 - LR: 0.000094
Epoch 4/50 - Train loss: 0.9270 - Val loss: 1.1034 - Val accuracy: 0.3333 - LR: 0.000092
Epoch 5/50 - Train loss: 0.8581 - Val loss: 1.1048 - Val accuracy: 0.3333 - LR: 0.000090
Epoch 6/50 - Train loss: 0.8281 - Val loss: 1.1063 - Val accuracy: 0.3333 - LR: 0.000088
Epoch 7/50 - Train loss: 0.8195 - Val loss: 1.1077 - Val accuracy: 0.3333 - LR: 0.000086
Epoch 8/50 - Train loss: 0.7612 - Val loss: 1.1097 - Val accuracy: 0.3333 - LR: 0.000084
Epoch 9/50 - Train loss: 0.6853 - Val loss: 1.1124 - Val accuracy: 0.3333 - LR: 0.000082
Epoch 10/50 - Train loss: 0.6679 - Val loss: 1.1157 - Val accuracy: 0.3333 - LR: 0.000080
Epoch 11/50 - Train loss: 0.6389 - Val loss: 1.1195 - Val accuracy: 0.3333

In [4]:
dataset = 'cwru'

if dataset == 'realworld':
    dataset_name = 'realworld_128_3ch_4cl'
    num_df_domains = 10
    num_dp_domains = 5
    num_classes = 4
    class_names = ['WAL', 'RUN', 'CLD', 'CLU']

elif dataset == 'cwru':
    dataset_name = 'cwru_256_3ch_5cl'
    num_df_domains = 4
    num_dp_domains = 4
    num_classes = 5
    class_names = ['IR', 'Ball', 'OR_centred', 'OR_orthogonal', 'OR_opposite']

classes_dict = {clss: i for i, clss in enumerate(class_names)}

accs = {}

for domain in range(num_dp_domains):
    for src_class in class_names:
        x_fs = []
        y_fs = []
        k_fs = []

        x_nfs = []
        y_nfs = []
        k_nfs = []

        trg_classes = [clss for clss in class_names if clss != src_class]
        for trg_class in trg_classes:

            x_fs_, y_fs_, k_fs_ = get_fs_data(dataset_name, classes_dict[trg_class], num_df_domains, domain)
            x_nfs_, y_nfs_, k_nfs_ = get_non_fs_data(dataset_name, classes_dict[trg_class], num_df_domains, domain)

            x_fs.append(x_fs_)
            y_fs.append(y_fs_)
            k_fs.append(k_fs_)

            x_nfs.append(x_nfs_)
            y_nfs.append(y_nfs_)
            k_nfs.append(k_nfs_)

        x_fs = np.concatenate(x_fs)
        y_fs = np.concatenate(y_fs)
        k_fs = np.concatenate(k_fs)

        x_nfs = np.concatenate(x_nfs)
        y_nfs = np.concatenate(y_nfs)
        k_nfs = np.concatenate(k_nfs)

        print(f'{src_class} - Domain {domain} - FS: {x_fs.shape[0]} - NFS: {x_nfs.shape[0]}')
    
        acc, loss = calculate_tstr_score(x_fs, y_fs, x_nfs, y_nfs)
        accs[(src_class, domain)] = acc

        print(f'{src_class} - Domain {domain} - Accuracy: {acc:.4f}\n')
        save_score(acc, loss, src_class, domain+num_df_domains, 'TSTRscoreFS', dataset)


for src_class in class_names:
    mean_acc = np.mean([accs[(src_class, domain)] for domain in range(num_dp_domains)])
    print(f'{src_class}: {mean_acc:.4f}')

IR - Domain 0 - FS: 20 - NFS: 1888
Epoch 1/50 - Train loss: 1.4403 - Val loss: 1.3872 - Val accuracy: 0.2500 - LR: 0.000098
Epoch 2/50 - Train loss: 1.3841 - Val loss: 1.3871 - Val accuracy: 0.2500 - LR: 0.000096
Epoch 3/50 - Train loss: 1.2779 - Val loss: 1.3869 - Val accuracy: 0.2500 - LR: 0.000094
Epoch 4/50 - Train loss: 1.2481 - Val loss: 1.3869 - Val accuracy: 0.2500 - LR: 0.000092
Epoch 5/50 - Train loss: 1.1893 - Val loss: 1.3868 - Val accuracy: 0.2500 - LR: 0.000090
Epoch 6/50 - Train loss: 1.0741 - Val loss: 1.3867 - Val accuracy: 0.2500 - LR: 0.000088
Epoch 7/50 - Train loss: 1.0604 - Val loss: 1.3867 - Val accuracy: 0.2500 - LR: 0.000086
Epoch 8/50 - Train loss: 0.9978 - Val loss: 1.3866 - Val accuracy: 0.2500 - LR: 0.000084
Epoch 9/50 - Train loss: 0.9660 - Val loss: 1.3867 - Val accuracy: 0.2500 - LR: 0.000082
Epoch 10/50 - Train loss: 0.9402 - Val loss: 1.3870 - Val accuracy: 0.2500 - LR: 0.000080
Epoch 11/50 - Train loss: 0.8580 - Val loss: 1.3873 - Val accuracy: 0.2500