### Wczytanie datasetu

In [None]:
import numpy as np

DATASET_NAME = "FordA"

CLASS_NAMES = ["Abnormal", "Normal"]

LENGTH = 500 # Długość sygnału

with open(f"{DATASET_NAME}_TRAIN.txt", "r") as f:
    X_train = []
    y_train = []
    for line in f.readlines():
        line = line.strip().split("  ")
        class_id = int(float(line[0]))
        class_id = max(class_id, 0)
        data = np.array(line[1:], dtype=np.float64)[:LENGTH]
        X_train.append(data)
        y_train.append(class_id)

with open(f"{DATASET_NAME}_TEST.txt", "r") as f:
    X_test = []
    y_test = []
    for line in f.readlines():
        line = line.strip().split("  ")
        class_id = int(float(line[0]))
        class_id = max(class_id, 0)
        data = np.array(line[1:], dtype=np.float64)[:LENGTH]
        X_test.append(data)
        y_test.append(class_id)

X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

### Wizualizacja przebiegów

In [None]:
import matplotlib.pyplot as plt
import numpy as np

fig,axes = plt.subplots(nrows=len(CLASS_NAMES),figsize=(10,5))
for i,ax in enumerate(axes):
    ax.axhline(0, color='black', linestyle='-', linewidth=1)
    for j in range(3):
        index = np.where(y_test == i)[0][j]
        ax.plot(np.array(X_test[index], dtype=float), linewidth=1, alpha=0.5)
    ax.set_title(f"{CLASS_NAMES[i]}")
    ax.set_xlabel("Samples [-]")
    ax.set_ylabel("Sensor Value [-]")
    ax.set_xlim(0, LENGTH)
    ax.set_ylim(-4, 4)

plt.tight_layout()
plt.show()


### Analiza danych

In [None]:
class_values = {}
class_stats = {}
for class_id in range(len(CLASS_NAMES)):
    class_values[class_id] = []
    class_stats[class_id] = {
        'mean': [],
        'mean-abs': [],
        'std': [],
        'std-abs': [],
        'max': [],
        'min': []
    }
    for index in np.where(y_train == class_id)[0]:
        class_values[class_id] += X_train[index].tolist()

        class_stats[class_id]['mean'].append(np.mean(X_train[index]))
        class_stats[class_id]['mean-abs'].append(np.mean(np.abs(X_train[index])))
        class_stats[class_id]['std'].append(np.std(X_train[index]))
        class_stats[class_id]['std-abs'].append(np.std(np.abs(X_train[index])))
        class_stats[class_id]['max'].append(np.max(X_train[index]))
        class_stats[class_id]['min'].append(np.min(X_train[index]))
    for key in class_stats[class_id]:
        class_stats[class_id][key] = np.mean(class_stats[class_id][key]), np.std(class_stats[class_id][key])

fig,axes = plt.subplots(nrows=len(CLASS_NAMES),figsize=(6,7))

for i,ax in enumerate(axes):
    ax.hist(class_values[i], bins=50, alpha=0.75, color='royalblue', edgecolor='black')
    ax.set_title(f"{CLASS_NAMES[i]}")
    ax.set_xlabel("Sensor Value [-]")
    ax.set_ylabel("Count")

plt.tight_layout()
plt.show()

for class_id in range(len(CLASS_NAMES)):
    print(f"Class: {CLASS_NAMES[class_id]}")
    print(f"\tMean: {class_stats[class_id]['mean'][0]:.3f} ({class_stats[class_id]['mean'][1]:.3f})")
    print(f"\tStd: {class_stats[class_id]['std'][0]:.3f} ({class_stats[class_id]['std'][1]:.3f})")
    print(f"\tMean Abs: {class_stats[class_id]['mean-abs'][0]:.3f} ({class_stats[class_id]['mean-abs'][1]:.3f})")
    print(f"\tStd Abs: {class_stats[class_id]['std-abs'][0]:.3f} ({class_stats[class_id]['std-abs'][1]:.3f})")
    print(f"\tMax: {class_stats[class_id]['max'][0]:.3f} ({class_stats[class_id]['max'][1]:.3f})")
    print(f"\tMin: {class_stats[class_id]['min'][0]:.3f} ({class_stats[class_id]['min'][1]:.3f})")


### Analiza w domenie częstotliwościowej

In [None]:
def get_fft_data(data: np.ndarray) -> np.ndarray:
    fft = np.fft.fft(np.array(data, dtype=float))
    freq = np.fft.fftfreq(fft.shape[-1])
    fft = fft[freq >= 0]
    freq = freq[freq >= 0]
    amplitude = (fft.real**2 + fft.imag**2)**.5
    phase = np.arctan2(fft.imag, fft.real)
    return freq, amplitude, phase

def transform_to_freq_domain(data: np.ndarray) -> np.ndarray:
    return np.array([get_fft_data(d)[1] for d in data], dtype=np.float64)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

fig,axes = plt.subplots(nrows=len(CLASS_NAMES),ncols=2,figsize=(10,5))
for i in range(len(axes)):
    for j in range(5):
        index = np.where(y_train == i)[0][j]
        freq, amplitude, phase = get_fft_data(X_train[index])
        
        
        axes[i][0].plot(freq, amplitude, linewidth=1, alpha=0.5)
        axes[i][1].plot(freq, phase, linewidth=1, alpha=0.5)
    for j in range(2):
        axes[i][j].set_title(f"{CLASS_NAMES[i]}")
        axes[i][j].set_xlabel("Frequency [-]")
        axes[i][j].set_xlim(0, freq.max())
    axes[i][0].set_ylabel("Amplitude [-]")
    axes[i][1].set_ylabel("Phase [-]")
    #ax.set_ylim(-4, 4)

plt.tight_layout()
plt.show()

W toku zajęć wykorzystamy dwa zestawy danych wejściowych:
- dane w domenie czasowej (przebieg czasowy)
- dane w domenie częstotliwościowej (amplitudy)

### Analiza cech wejściowych

In [None]:
import seaborn as sns

# Macierz korelacji w domenie czasowej

X_train_corr = np.corrcoef(X_train.T)
fig,ax = plt.subplots(figsize=(10,10))
sns.heatmap(X_train_corr, ax=ax, cmap='coolwarm', center=0)
ax.set_title("Correlation Matrix")
ax.set_xlabel("Samples [-]", fontdict={'fontsize': 9})
ax.set_ylabel("Samples [-]", fontdict={'fontsize': 9})
plt.show()

# Macierz korelacji w domenie czasowej

X_train_corr = np.corrcoef(transform_to_freq_domain(X_train).T)
fig,ax = plt.subplots(figsize=(10,10))
sns.heatmap(X_train_corr, ax=ax, cmap='coolwarm', center=0)
ax.set_title("Correlation Matrix")
ax.set_xlabel("Samples [-]", fontdict={'fontsize': 9})
ax.set_ylabel("Samples [-]", fontdict={'fontsize': 9})
plt.show()

del X_train_corr

In [None]:
# Zmienność (odchylenie) cech wejściowych

fig,axes = plt.subplots(figsize=(12,4), ncols=2)
axes[0].plot(np.std(X_train[y_train==0], axis=0), linewidth=1, alpha=0.75)
axes[0].plot(np.std(X_train[y_train==1], axis=0), linewidth=1, alpha=0.75)
axes[0].set_title("Input Features Variance (Time Domain)")
axes[1].plot(np.mean(X_train[y_train==0], axis=0), linewidth=1, alpha=0.75)
axes[1].plot(np.mean(X_train[y_train==1], axis=0), linewidth=1, alpha=0.75)
axes[1].set_title("Input Features Mean Value (Time Domain)")
plt.show()

X_train_fft = transform_to_freq_domain(X_train)
fig,axes = plt.subplots(figsize=(12,4), ncols=2)
axes[0].plot(np.std(X_train_fft[y_train==0], axis=0), linewidth=1, alpha=0.75)
axes[0].plot(np.std(X_train_fft[y_train==1], axis=0), linewidth=1, alpha=0.75)
axes[0].set_title("Input Features Variance (Frequency Domain)")
axes[1].plot(np.mean(X_train_fft[y_train==0], axis=0), linewidth=1, alpha=0.75)
axes[1].plot(np.mean(X_train_fft[y_train==1], axis=0), linewidth=1, alpha=0.75)
axes[1].set_title("Input Features Mean Value (Frequency Domain)")
plt.show()

del X_train_fft


### Klasyfikacja

In [None]:
from sklearn.preprocessing import FunctionTransformer

fft_features = FunctionTransformer(transform_to_freq_domain)

In [None]:
# Stworzenie pipeline'u
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
# SVM
from sklearn.svm import SVC
# Random Forest
from sklearn.ensemble import RandomForestClassifier
# KNN
from sklearn.neighbors import KNeighborsClassifier

USE_FFT = True

if USE_FFT:
    pipeline_SVM = Pipeline([
        ('feature_extractor', fft_features),
        ('scaler', StandardScaler()),
        ('model', SVC(kernel='rbf'))
    ])

    pipeline_RF = Pipeline([
        ('feature_extractor', fft_features),
        ('scaler', StandardScaler()),
        ('model', RandomForestClassifier())
    ])

    pipeline_KNN = Pipeline([
        ('feature_extractor', fft_features),
        ('scaler', StandardScaler()),
        ('model', KNeighborsClassifier())
    ])
else:
    pipeline_SVM = Pipeline([
        ('scaler', StandardScaler()),
        ('model', SVC(kernel='rbf'))
    ])

    pipeline_RF = Pipeline([
        ('scaler', StandardScaler()),
        ('model', RandomForestClassifier())
    ])

    pipeline_KNN = Pipeline([
        ('scaler', StandardScaler()),
        ('model', KNeighborsClassifier())
    ])

# Fit

pipeline_SVM.fit(X_train, y_train)
pipeline_RF.fit(X_train, y_train)
pipeline_KNN.fit(X_train, y_train)

# Evaluate
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from seaborn import heatmap

for pipeline, name in zip((pipeline_SVM, pipeline_RF, pipeline_KNN), ("SVM", "RandomForest", "KNN")):

    preds = pipeline.predict(X_test)

    report = classification_report(y_test, preds, target_names=CLASS_NAMES, zero_division=0, digits=3)
    print(report)

    confusion_matrix_ = confusion_matrix(y_test, preds)
    confusion_matrix_ = confusion_matrix_ / confusion_matrix_[:, :].sum(axis=1)[:, np.newaxis]
    ax = heatmap(confusion_matrix_, annot=True, xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES, cmap='Blues', fmt=".3f")
    ax.set_xlabel("Predicted") 
    ax.set_ylabel("Ground truth")
    ax.set_title(f"{name}")
    plt.show()

### Wykorzystanie CNN oraz MLP

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

### Stworzenie datasetu i dataloadera

In [None]:
BATCH_SIZE = 32
LEARNING_RATE = 1e-4

# Dataset
class CustomDataset(Dataset):
    def __init__(self, X: np.ndarray, y: np.ndarray, use_fft: bool):
        if use_fft:
            # Transform to frequency domain
            X = transform_to_freq_domain(X)
        else:
            # Add channel dimension
            X = np.expand_dims(X, axis=1) # N x C x L, with C=1
        
        # Standarize
        X = (X - np.mean(X)) / np.std(X)

        self.X = torch.tensor(X.astype(np.float32))
        self.y = torch.tensor(y.astype(np.int64))

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
    def get_class_weights(self):
        class_sample_count = np.unique(self.y.numpy(), return_counts=True)[1]
        weight = (np.sum(class_sample_count) / class_sample_count).astype(np.float32)
        return torch.from_numpy(weight)


# Dataloaders
dataset_train = CustomDataset(X_train, y_train, use_fft=False)
dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)

dataset_test = CustomDataset(X_test, y_test, use_fft=False)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)

dataset_train_fft = CustomDataset(X_train, y_train, use_fft=True)
dataloader_train_fft = DataLoader(dataset_train_fft, batch_size=BATCH_SIZE, shuffle=True)

dataset_test_fft = CustomDataset(X_test, y_test, use_fft=True)
dataloader_test_fft = DataLoader(dataset_test_fft, batch_size=BATCH_SIZE, shuffle=False)

print(dataset_train.get_class_weights())

### Definicja własnej sieci neuronowej

https://journals.sagepub.com/doi/full/10.3233/ICA-200617

In [None]:
class CNN(nn.Module):
    def __init__(self, n_class: int, n_channels_in: int) -> None:
        super().__init__()

        CHANNELS_0 = 32
        CHANNELS_1 = 64
        CHANNELS_2 = 128
        NEURONS_0 = 512
        NEURONS_1 = 128

        self.conv0 = nn.Sequential(
            nn.Conv1d(n_channels_in, out_channels=CHANNELS_0, kernel_size=7, padding=3),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
        )

        self.conv1 = nn.Sequential(
            nn.Conv1d(CHANNELS_0, out_channels=CHANNELS_1, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
        )

        self.conv2 = nn.Sequential(
            nn.Conv1d(CHANNELS_1, out_channels=CHANNELS_2, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
        )
        
        self.fc0 = nn.Sequential(
            nn.Linear(CHANNELS_2, NEURONS_0),
            nn.ReLU()
        )

        self.fc1 = nn.Sequential(
            nn.Linear(NEURONS_0, NEURONS_1),
            nn.ReLU()
        )

        self.dropout = nn.Dropout(0.5)

        self.fc2 = nn.Sequential(
            nn.Linear(NEURONS_1, n_class),
            nn.Softmax(dim=1)
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:

        out = self.conv0(x)
        out = self.conv1(out)
        out = self.conv2(out)

        # Global max pooling
        out = torch.max(out, dim=2).values

        out = self.fc0(out)
        out = self.dropout(out)
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.fc2(out)
        
        return out

    def print_data_shapes(self, input_length: int) -> None:

        x = torch.randn(1, 1, input_length).to(next(self.parameters()).device)
        print(f"Input shape: {x.shape[1:]}")

        out = self.conv0(x)
        print(f"Conv0 shape: {out.shape[1:]}")

        out = self.conv1(out)
        print(f"Conv1 shape: {out.shape[1:]}")

        out = self.conv2(out)
        print(f"Conv2 shape: {out.shape[1:]}")

        out = torch.max(out, dim=2).values
        print(f"Global max pooling shape: {out.shape}")

        out = self.fc0(out)
        print(f"FC0 shape: {out.shape[1:]}")

        out = self.fc1(out)
        print(f"FC1 shape: {out.shape[1:]}")

        out = self.fc2(out)
        print(f"FC2 shape: {out.shape[1:]}")
    
    def get_conv_embedding(self, x: torch.Tensor) -> torch.Tensor:

        out = self.conv0(x)
        out = self.conv1(out)
        out = self.conv2(out)

        # Global max pooling
        out = torch.max(out, dim=2).values

        return out
    
    def get_fc_embedding(self, x: torch.Tensor) -> torch.Tensor:
        out = self.conv0(x)
        out = self.conv1(out)
        out = self.conv2(out)

        # Global max pooling
        out = torch.max(out, dim=2).values


        out = self.fc0(out)
        out = self.dropout(out)
        out = self.fc1(out)
        
        return out
    
    def predict(self, X):
        self.eval()
        with torch.no_grad():
            pred = self(X).argmax(dim=1)
        return pred

In [None]:
class MLP(nn.Module):
    def __init__(self, n_class: int, n_features_in: int) -> None:
        super().__init__()

        NEURONS_0 = n_features_in
        NEURONS_1 = NEURONS_0*4
        NEURONS_2 = NEURONS_0
        
        self.fc0 = nn.Sequential(
            nn.Linear(NEURONS_0, NEURONS_1),
            nn.ReLU()
        )

        self.fc1 = nn.Sequential(
            nn.Linear(NEURONS_1, NEURONS_2),
            nn.ReLU()
        )

        self.dropout = nn.Dropout(0.5)

        self.fc2 = nn.Sequential(
            nn.Linear(NEURONS_2, n_class),
            nn.Softmax(dim=1)
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        
        out = self.fc0(x)
        out = self.dropout(out)
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.fc2(out)
                
        return out
    
    def get_fc_embedding(self, x: torch.Tensor) -> torch.Tensor:
        
        out = self.fc0(x)
        out = self.dropout(out)
        out = self.fc1(out)
        
        return out
    
    def predict(self, X):
        self.eval()
        with torch.no_grad():
            pred = self(X).argmax(dim=1)
        return pred

In [None]:
from torch.cuda import is_available

iscuda = True if is_available() else False
if iscuda: print(torch.cuda.get_device_name(0))

In [None]:
model_cnn = CNN(len(CLASS_NAMES), 1)
model_mlp = MLP(len(CLASS_NAMES), LENGTH//2)
if iscuda: 
    model_cnn = model_cnn.cuda()
    model_mlp = model_mlp.cuda()


In [None]:
model_cnn.print_data_shapes(X_train.shape[1])

### Training DL model

In [None]:
def plot_loss(train_loss, val_loss, val_accuracy, save_name):

    fig, axes = plt.subplots(ncols=2, figsize=(8, 4))

    
    axes[0].plot(val_loss, marker='.', label='Validation', linewidth=.5)
    axes[0].plot(train_loss, marker='.', label='Train', linewidth=.5)
    axes[1].plot(val_accuracy, marker='.', label='Validation', linewidth=.5)

    axes[0].legend()
    axes[1].legend()

    axes[0].set_xlabel("Epoch")
    axes[0].set_ylabel("Loss")
    
    axes[1].set_xlabel("Epoch")
    axes[1].set_ylabel("Accuracy")

    fig.tight_layout()
    
    fig.savefig(save_name, dpi=300, bbox_inches='tight')

    plt.show()

In [None]:
from torch import optim
from tqdm import tqdm

def train(dataloader_train, dataloader_test, model, save_name, epochs=5):

    class_weights = dataset_train.get_class_weights().to('cuda' if iscuda else 'cpu')
    criterion = nn.CrossEntropyLoss(weight=class_weights) # Important - class imbalance!
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)

    train_loss = []
    val_loss = []
    val_accuracy = []

    for epoch in range(epochs):
        
        model.train()
        losses = []
        
        for i, (inputs, labels) in tqdm(enumerate(dataloader_train, 0),desc=f"[{epoch + 1}/{epochs}] Training"):
            inputs = inputs.to('cuda' if iscuda else 'cpu')
            labels = labels.to('cuda' if iscuda else 'cpu')
            
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            losses.append(loss.item())

        train_loss.append(np.mean(losses))

        # validation loss
        model.eval()
        losses = []
        correct_preds = 0
        
        for i, (inputs, labels) in tqdm(enumerate(dataloader_test),desc=f"[{epoch + 1}/{epochs}] Validation"):
            inputs = inputs.to('cuda' if iscuda else 'cpu')
            labels = labels.to('cuda' if iscuda else 'cpu')

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            losses.append(loss.item())
            correct_preds += (outputs.argmax(dim=1) == labels).float().mean().item()

        val_accuracy.append(correct_preds/len(dataloader_test))
        val_loss.append(np.mean(losses))
        
        # print loss
        print(f'\ttrain loss: {train_loss[-1]:.4f} | val loss: {val_loss[-1]:.4f}| val acc.: {val_accuracy[-1]*100.0:.2f}\n')

    torch.save(model.state_dict(), save_name)

    return train_loss, val_loss, val_accuracy

In [None]:
train_loss, val_loss, val_accuracy = train(dataloader_train, dataloader_test, model_cnn, 'model_cnn.pth', epochs=25)

In [None]:
plot_loss(train_loss, val_loss, val_accuracy, 'model_cnn.png')

In [None]:
train_loss, val_loss, val_accuracy = train(dataloader_train_fft, dataloader_test_fft, model_mlp, 'model_mlp.pth', epochs=25)

In [None]:
plot_loss(train_loss, val_loss, val_accuracy, 'model_mlp.png')

In [None]:
def predict_test(model, dataloader_test):
    predictions, labels = [], []
    for i, (input, label) in tqdm(enumerate(dataloader_test),desc=f"| Validation"):
        input = input.to('cuda' if iscuda else 'cpu')
        label = label.to('cuda' if iscuda else 'cpu')
        
        predictions += model.predict(input).detach().cpu().numpy().tolist()
        labels += label.detach().cpu().numpy().tolist()
    return predictions, labels

In [None]:
preds_cnn, labels_cnn = predict_test(model_cnn, dataloader_test)

report_cnn = classification_report(labels_cnn, preds_cnn, target_names=CLASS_NAMES, zero_division=0, digits=3)
print(report_cnn)

confusion_matrix_cnn = confusion_matrix(labels_cnn, preds_cnn)
confusion_matrix_cnn = confusion_matrix_cnn / confusion_matrix_cnn[:, :].sum(axis=1)[:, np.newaxis]
ax = heatmap(confusion_matrix_cnn, annot=True, xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES, cmap='Blues', fmt=".3f")
ax.set_xlabel("Predicted") 
ax.set_ylabel("Ground truth")
plt.show()

In [None]:
preds_mlp, labels_mlp = predict_test(model_mlp, dataloader_test_fft)

report_mlp = classification_report(labels_mlp, preds_mlp, target_names=CLASS_NAMES, zero_division=0, digits=3)
print(report_mlp)

confusion_matrix_mlp = confusion_matrix(labels_mlp, preds_mlp)
confusion_matrix_mlp = confusion_matrix_mlp / confusion_matrix_mlp[:, :].sum(axis=1)[:, np.newaxis]
ax = heatmap(confusion_matrix_mlp, annot=True, xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES, cmap='Blues', fmt=".3f")
ax.set_xlabel("Predicted") 
ax.set_ylabel("Ground truth")
plt.show()

### Analiza cech głębokich - CNN

In [None]:
def extract_features_conv(model, dataloader_test):
    features, labels = [], []
    for i, (input, label) in tqdm(enumerate(dataloader_test),desc=f"| Extracting features"):
        input = input.to('cuda' if iscuda else 'cpu')
        label = label.to('cuda' if iscuda else 'cpu')
        
        features += model.get_conv_embedding(input).detach().cpu().numpy().tolist()
        labels += label.detach().cpu().numpy().tolist()
    return np.array(features), np.array(labels)

def extract_features_linear(model, dataloader_test):
    features, labels = [], []
    for i, (input, label) in tqdm(enumerate(dataloader_test),desc=f"| Extracting features"):
        input = input.to('cuda' if iscuda else 'cpu')
        label = label.to('cuda' if iscuda else 'cpu')
        
        features += model.get_fc_embedding(input).detach().cpu().numpy().tolist()
        labels += label.detach().cpu().numpy().tolist()
    return np.array(features), np.array(labels)

In [None]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

def plot_features(features, labels, save_name):
        
    tsne = TSNE(n_components=2, random_state=0, perplexity=40)
    pca = PCA(n_components=2)

    X_tsne = tsne.fit_transform(features)
    X_pca = pca.fit_transform(features)

    fig, axes = plt.subplots(ncols=2, figsize=(10, 5))

    for i, ax in enumerate(axes):
        for j in range(len(CLASS_NAMES)):
            index = np.where(labels == j)[0]
            if i == 0:
                ax.scatter(X_tsne[index, 0], X_tsne[index, 1], label=CLASS_NAMES[j], marker='.', color='crimson' if j == 0 else 'royalblue')
            else:
                ax.scatter(X_pca[index, 0], X_pca[index, 1], label=CLASS_NAMES[j], marker='.', color='crimson' if j == 0 else 'royalblue')
        ax.legend()
        ax.set_xlabel(f"PC1 {pca.explained_variance_ratio_[0]*100:.2f}%" if i == 1 else "t-SNE 1")
        ax.set_ylabel(f"PC1 {pca.explained_variance_ratio_[1]*100:.2f}%" if i == 1 else "t-SNE 2")

    fig.tight_layout()

    fig.savefig(save_name, dpi=300, bbox_inches='tight')

    plt.show()

In [None]:
features, labels = extract_features_conv(model_cnn, dataloader_test)
plot_features(features, labels, 'features_cnn_1.png')

In [None]:
features, labels = extract_features_linear(model_cnn, dataloader_test)
plot_features(features, labels, 'features_cnn_2.png')

In [None]:
features, labels = extract_features_linear(model_mlp, dataloader_test_fft)
plot_features(features, labels, 'features_cnn_2.png')