In [1]:
import pickle
import numpy as np
from statsmodels.regression.linear_model import burg
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold

seed = 2710
np.random.seed(seed)

In [2]:
def get_fs_data(dataset_name, class_idx, num_train_domains, domain):

    # Load the dataset
    with open(f'data/{dataset_name}.pkl', 'rb') as f:
        x, y, k = pickle.load(f)

    with open(f'data/{dataset_name}_fs.pkl', 'rb') as f:
        fs = pickle.load(f)

    x = x[fs == 1]
    y = y[fs == 1]
    k = k[fs == 1]
    
    x_ = x[(y == class_idx) & (k == domain)]
    y_ = y[(y == class_idx) & (k == domain)]
    k_ = k[(y == class_idx) & (k == domain)] - num_train_domains

    return x_, y_, k_


def get_non_fs_data(dataset_name, class_idx, num_train_domains, domain):

    # Load the dataset
    with open(f'data/{dataset_name}.pkl', 'rb') as f:
        x, y, k = pickle.load(f)

    with open(f'data/{dataset_name}_fs.pkl', 'rb') as f:
        fs = pickle.load(f)

    x = x[fs == 0]
    y = y[fs == 0]
    k = k[fs == 0]
    
    x_ = x[(y == class_idx) & (k == domain)]
    y_ = y[(y == class_idx) & (k == domain)]
    k_ = k[(y == class_idx) & (k == domain)] - num_train_domains

    return x_, y_, k_



def safe_burg(x, order=4):
    if np.std(x) > 1e-6:  # Ensures there's enough variation in the data
        return burg(x, order)[0]
    else:
        return np.zeros(order)  # Return zeroed features if input data is constant
    

# Ensure no zero values in the entropy calculation
def entropy_safe(x):
    x_safe = np.clip(x, 1e-6, None)  # clip only lower bound
    return -np.sum(x_safe * np.log(x_safe), axis=2)


def extract_features_all(x):
    mean = np.mean(x, axis=2)
    std = np.std(x, axis=2)
    var = np.var(x, axis=2)
    min = np.min(x, axis=2)
    max = np.max(x, axis=2)
    thirdmoment = np.mean((x - np.mean(x, axis=2, keepdims=True))**3, axis=2)
    fourthmoment = np.mean((x - np.mean(x, axis=2, keepdims=True))**4, axis=2)
    skewness = thirdmoment / ((std+1e-6)**3)
    kurtosis = fourthmoment / ((std+1e-6)**4)
    mad = np.median(np.abs(x - np.median(x, axis=2, keepdims=True)), axis=2)
    sma = np.sum(np.abs(x), axis=2)
    energy = np.sum(x**2, axis=2)
    iqr = np.percentile(x, 75, axis=2) - np.percentile(x, 25, axis=2)
    firstquartile = np.percentile(x, 25, axis=2)
    secondquartile = np.percentile(x, 50, axis=2)
    thirdquartile = np.percentile(x, 75, axis=2)
    entropy = entropy_safe(x)
    autocorr_x = np.array([safe_burg(x[i, 0, :], order=4) for i in range(x.shape[0])])
    autocorr_y = np.array([burg(x[i, 1, :], order=4)[0] for i in range(x.shape[0])])
    autocorr_z = np.array([burg(x[i, 2, :], order=4)[0] for i in range(x.shape[0])])
    
    return np.concatenate([mean, std, var, min, max, thirdmoment, fourthmoment, 
                           skewness, kurtosis, mad, sma, energy, iqr, firstquartile, 
                           secondquartile, thirdquartile, entropy, 
                           autocorr_x, autocorr_y, autocorr_z], axis=1)

def extract_temporal_features(x):
    x = np.clip(x, 0, 1)
    return extract_features_all(x)


def extract_spectral_features(x):
    x_freq = np.fft.rfft(x, axis=2)
    x_mag = np.abs(x_freq)
    return extract_features_all(x_mag)


def extract_features(x):
    x_temporal = extract_temporal_features(x)
    x_spectral = extract_spectral_features(x)
    return np.concatenate([x_temporal, x_spectral], axis=1)


def remap_labels(y):
    label_map = {clss: i for i, clss in enumerate(np.unique(y))}
    return np.array([label_map[clss] for clss in y])


def calculate_tstr_score(x_fs, y_fs, x_nfs, y_nfs):
    # Extract features
    x_fs = extract_features(x_fs)
    x_nfs = extract_features(x_nfs)

    # Remap labels
    y_fs = remap_labels(y_fs)
    y_nfs = remap_labels(y_nfs)

    train_data = lgb.Dataset(x_fs, label=y_fs)

    num_classes = len(np.unique(y_fs))

    params = {
        'objective': 'multiclass' if num_classes > 2 else 'binary',
        'num_class': num_classes if num_classes > 2 else 1,
        'metric': 'multi_logloss' if num_classes > 2 else 'binary_logloss',
        'seed': 2710,
        'verbosity': -1
    }

    model = lgb.train(params, train_data)
    y_pred = model.predict(x_nfs)
    acc = np.mean(np.argmax(y_pred, axis=1) == y_nfs)
    
    return acc

In [4]:
dataset = 'realworld'

if dataset == 'realworld':
    dataset_name = 'realworld_128_3ch_4cl'
    num_df_domains = 10
    num_dp_domains = 5
    num_classes = 4
    class_names = ['WAL', 'RUN', 'CLD', 'CLU']

elif dataset == 'cwru':
    dataset_name = 'cwru_256_3ch_5cl'
    num_df_domains = 4
    num_dp_domains = 4
    num_classes = 5
    class_names = ['IR', 'Ball', 'OR_centred', 'OR_orthogonal', 'OR_opposite']

classes_dict = {clss: i for i, clss in enumerate(class_names)}

accs = {}

for domain in range(num_dp_domains):
    for src_class in class_names:
        x_fs = []
        y_fs = []
        k_fs = []

        x_nfs = []
        y_nfs = []
        k_nfs = []

        trg_classes = [clss for clss in class_names if clss != src_class]
        for trg_class in trg_classes:

            x_fs_, y_fs_, k_fs_ = get_fs_data(dataset_name, classes_dict[trg_class], num_df_domains, domain)
            x_nfs_, y_nfs_, k_nfs_ = get_non_fs_data(dataset_name, classes_dict[trg_class], num_df_domains, domain)

            x_fs.append(x_fs_)
            y_fs.append(y_fs_)
            k_fs.append(k_fs_)

            x_nfs.append(x_nfs_)
            y_nfs.append(y_nfs_)
            k_nfs.append(k_nfs_)

        x_fs = np.concatenate(x_fs)
        y_fs = np.concatenate(y_fs)
        k_fs = np.concatenate(k_fs)

        x_nfs = np.concatenate(x_nfs)
        y_nfs = np.concatenate(y_nfs)
        k_nfs = np.concatenate(k_nfs)
    
        acc = calculate_tstr_score(x_fs, y_fs, x_nfs, y_nfs)
        accs[(src_class, domain)] = acc


for src_class in class_names:
    mean_acc = np.mean([accs[(src_class, domain)] for domain in range(num_dp_domains)])
    print(f'{src_class}: {mean_acc:.4f}')

WAL: 0.3863
RUN: 0.4357
CLD: 0.4003
CLU: 0.4106


In [5]:
dataset = 'cwru'

if dataset == 'realworld':
    dataset_name = 'realworld_128_3ch_4cl'
    num_df_domains = 10
    num_dp_domains = 5
    num_classes = 4
    class_names = ['WAL', 'RUN', 'CLD', 'CLU']

elif dataset == 'cwru':
    dataset_name = 'cwru_256_3ch_5cl'
    num_df_domains = 4
    num_dp_domains = 4
    num_classes = 5
    class_names = ['IR', 'Ball', 'OR_centred', 'OR_orthogonal', 'OR_opposite']

classes_dict = {clss: i for i, clss in enumerate(class_names)}

accs = {}

for domain in range(num_dp_domains):
    for src_class in class_names:
        x_fs = []
        y_fs = []
        k_fs = []

        x_nfs = []
        y_nfs = []
        k_nfs = []

        trg_classes = [clss for clss in class_names if clss != src_class]
        for trg_class in trg_classes:

            x_fs_, y_fs_, k_fs_ = get_fs_data(dataset_name, classes_dict[trg_class], num_df_domains, domain)
            x_nfs_, y_nfs_, k_nfs_ = get_non_fs_data(dataset_name, classes_dict[trg_class], num_df_domains, domain)

            x_fs.append(x_fs_)
            y_fs.append(y_fs_)
            k_fs.append(k_fs_)

            x_nfs.append(x_nfs_)
            y_nfs.append(y_nfs_)
            k_nfs.append(k_nfs_)

        x_fs = np.concatenate(x_fs)
        y_fs = np.concatenate(y_fs)
        k_fs = np.concatenate(k_fs)

        x_nfs = np.concatenate(x_nfs)
        y_nfs = np.concatenate(y_nfs)
        k_nfs = np.concatenate(k_nfs)
    
        acc = calculate_tstr_score(x_fs, y_fs, x_nfs, y_nfs)
        accs[(src_class, domain)] = acc


for src_class in class_names:
    mean_acc = np.mean([accs[(src_class, domain)] for domain in range(num_dp_domains)])
    print(f'{src_class}: {mean_acc:.4f}')

IR: 0.2495
Ball: 0.2501
OR_centred: 0.2503
OR_orthogonal: 0.2502
OR_opposite: 0.2503
