In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.manifold import TSNE

In [2]:
## plotting utilities
aa1_to_index = {'A': 0, 'C': 1, 'D': 2, 'E': 3, 'F': 4, 'G': 5, 'H': 6,
                'I': 7, 'K': 8, 'L': 9, 'M': 10, 'N': 11, 'P': 12,
                'Q': 13, 'R': 14, 'S': 15, 'T': 16, 'V': 17, 'W': 18,
                'Y': 19, 'X':20, 'NOSEQ':21, '-': 22}
index_to_aa1 = {v: k for k, v in aa1_to_index.items()}

colors = [[106,61,154],
          [254,192,113],
          [226,28,29],
          [250,155,153],
          [252,129,2],
          [168,206,227],
          [51,162,45],
          [179,225,139],
          [28,121,179],
          [193,193,193]]
colors = np.array(colors) / 255.0

in_article = ['Acidobacteria', 'Actinobacteria', 'Bacteroidetes',
              'Chloroflexi', 'Cyanobacteria', 'Deinococcus-Thermus',
              'Firmicutes', 'Fusobacteria', 'Proteobacteria']#, 'Other']

labels = np.load('embeddings/labels.npy', allow_pickle=True)

def _plot_loop(data, ax=None):
    if ax is None:
        ax = plt
    for i in range(len(in_article)):
        
        ax.plot(data[labels==in_article[i],0],
                data[labels==in_article[i],1],
                '.', 
                color=colors[i], 
                label=in_article[i], 
                markersize=1
        )
    ax.axis('off')

def plotter(data, name=None):
    fig = plt.figure()
    _plot_loop(data)
    plt.axis('off')
    if name is not None: 
        plt.title(name)
        plt.savefig(f'name.svg', bbox_inches='tight')
    
def plot_grid(data, names, label):
    fig, ax = plt.subfigures(3, 2)
    for d, n in zip(data, names):
        _plot_loop(d, n, ax)
    plt.show()    


In [None]:
load_preprocessed_embeddings = True
if load_preprocessed_embeddings:  # if true, load already dimensionality reduced embeddings else load full embeddings and redo dimensionality reduction
    lstm_all, lstm_beta = np.load('embeddings/pre_embedding_lstm.npy', allow_pickle=True)
    resnet_all, resnet_beta = np.load('embeddings/pre_embedding_resnet.npy', allow_pickle=True)
    transformer_all, transformer_beta = np.load('embeddings/pre_embedding_transformer.npy', allow_pickle=True)
    bottleneck_all, bottleneck_beta = np.load('embeddings/pre_embedding_bottleneck.npy', allow_pickle=True)
else:
    lstm_all, lstm_beta = np.load('embeddings/embedding_lstm.npy', allow_pickle=True)
    resnet_all, resnet_beta = np.load('embeddings/embedding_resnet.npy', allow_pickle=True)
    transformer_all, transformer_beta = np.load('embeddings/embedding_transformer.npy', allow_pickle=True)
    bottleneck_all, bottleneck_beta = np.load('embeddings/embedding_bottleneck.npy', allow_pickle=True)
    
    # Tsne dimensionality reduction
    tsne = TSNE(n_components=2)
    
    lstm_all = tsne.fit_transform(lstm_all)
    lstm_beta = tsne.fit_transform(lstm_beta)
    resnet_all = tsne.fit_transform(resnet_all)
    resnet_beta = tsne.fit_transform(resnet_beta)
    transformer_all = tsne.fit_transform(transformer_all)
    transformer_beta = tsne.fit_transform(transformer_beta)
    bottleneck_all = tsne.fit_transform(bottleneck_all)
    bottleneck_beta = tsne.fit_transform(bottleneck_beta)    

In [None]:

show_grid = True  # if true, visualize embeddings in a grid else save them to individual files
if show_grid:
    plot_grid(
        [lstm_all, resnet_all, transformer_all, bottleneck_all, lstm_beta, resnet_beta, transformer_beta, bottleneck_beta]
        ['lstm_all', 'resnet_all', 'transformer_all', 'bottleneck_all', 'lstm_beta', 'resnet_beta', 'transformer_beta', 'bottleneck_beta',]
    )
else:
    for data, name in zip(
            [lstm_all, resnet_all, transformer_all, bottleneck_all, lstm_beta, resnet_beta, transformer_beta, bottleneck_beta],
            ['lstm_all', 'resnet_all', 'transformer_all', 'bottleneck_all', 'lstm_beta', 'resnet_beta', 'transformer_beta', 'bottleneck_beta',]
    ):
        plotter(data, name)