In [1]:
#@title Imports
from sklearn.decomposition import PCA
import plotly.graph_objects as go
from sklearn.manifold import TSNE
import plotly.express as px
from tqdm import tqdm
import pandas as pd
import numpy as np
import random

In [2]:
#@title Load sequences from csv
df = pd.read_csv('./drive/MyDrive/LazBF_sequences.csv')
LazBF_sequences = df['sequences'].tolist()
LazBF_labels = df['labels'].tolist()

df = pd.read_csv('./drive/MyDrive/LazBF_sample.csv')
LazBF_sample = df['sequences'].tolist()
LazBF_sample_labels = df['labels'].tolist()

df = pd.read_csv('./drive/MyDrive/LazDEF_sequences.csv')
LazDEF_sequences = df['sequences'].tolist()
LazDEF_labels = df['labels'].tolist()

df = pd.read_csv('./drive/MyDrive/LazDEF_sample.csv')
LazDEF_sample = df['sequences'].tolist()
LazDEF_sample_labels = df['labels'].tolist()

In [3]:
#@title Load Embs
lazbf_mlm_none = np.load("./drive/MyDrive/Embeddings/LazBF_mlm_none.npy")
lazdef_mlm_none = np.load("./drive/MyDrive/Embeddings/LazDEF_mlm_none.npy")

lazbf_mlm_lazbf = np.load("./drive/MyDrive/Embeddings/LazBF_mlm_LazBF.npy")
lazdef_mlm_lazbf = np.load("./drive/MyDrive/Embeddings/LazDEF_mlm_LazBF.npy")

lazbf_mlm_lazdef = np.load("./drive/MyDrive/Embeddings/LazBF_mlm_LazDEF.npy")
lazdef_mlm_lazdef = np.load("./drive/MyDrive/Embeddings/LazDEF_mlm_LazDEF.npy")

In [4]:
cutoff = 5000

lazbf_mlm_none = lazbf_mlm_none[:cutoff]
lazdef_mlm_none = lazdef_mlm_none[:cutoff]

lazbf_mlm_lazbf = lazbf_mlm_lazbf[:cutoff]
lazdef_mlm_lazbf = lazdef_mlm_lazbf[:cutoff]

lazbf_mlm_lazdef = lazbf_mlm_lazdef[:cutoff]
lazdef_mlm_lazdef = lazdef_mlm_lazdef[:cutoff]

LazBF_sample_labels = LazBF_sample_labels[:cutoff]
LazDEF_sample_labels = LazDEF_sample_labels[:cutoff]

In [None]:
#@title t-sne for lazbf_mlm_none
pca = PCA(n_components=100, random_state=1)
X_embedded = pca.fit_transform(lazbf_mlm_none)

tsne = TSNE(n_components=2, perplexity=cutoff/100, learning_rate=cutoff/12, random_state=2)
X_embedded = tsne.fit_transform(X_embedded)

df = pd.DataFrame({'t-SNE 1': X_embedded[:, 0], 't-SNE 2': X_embedded[:, 1], 'labels': LazBF_sample_labels, 'alpha': [0.75]*cutoff})
fig = px.scatter(df, x='t-SNE 1', y='t-SNE 2', color='labels', color_continuous_scale=['royalblue', 'crimson'], opacity=[0.85]*cutoff)

fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    yaxis=dict(scaleanchor="y", scaleratio=1, showgrid=False, zeroline=False, color='black'),
    xaxis=dict(scaleanchor="x", scaleratio=1, showgrid=False, zeroline=False, color='black'),
    plot_bgcolor='#fff', #rgba(0, 0, 0, 0)',
    paper_bgcolor='#fff', #'rgba(0, 0, 0, 0)',
    showlegend=False,
)

# fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
# fig.update_yaxes(showline=True, linewidth=1, linecolor='black')

fig.update_traces(marker=dict(size=5))
fig.add_annotation(
    text="d",
    xref="paper",
    yref="paper",
    x=-0.2,
    y=1.13,
    showarrow=False,
    font=dict(size=32, color='black') #font=dict(family='times new roman', size=24, color='black')
)
fig.update_layout(coloraxis_showscale=False, font=dict(family="Arial"))

fig.update_xaxes(
    title_font=dict(size=24),  # Increase the font size of the x-axis title
    tickfont=dict(size=24),     # Increase the font size of tick labels
)
fig.update_yaxes(
    title_font=dict(size=24),  # Increase the font size of the y-axis title
    tickfont=dict(size=24),     # Increase the font size of tick labels
)

# Define border rectangles
x_range = [df['t-SNE 1'].min()-10, df['t-SNE 1'].max()+10]
y_range = [df['t-SNE 2'].min()-10, df['t-SNE 2'].max()+10]

border_x = [x_range[0], x_range[1], x_range[1], x_range[0], x_range[0]]
border_y = [y_range[0], y_range[0], y_range[1], y_range[1], y_range[0]]

fig.add_shape(
    type="line",
    x0=border_x[0], y0=border_y[0], x1=border_x[1], y1=border_y[1],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[1], y0=border_y[1], x1=border_x[2], y1=border_y[2],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[2], y0=border_y[2], x1=border_x[3], y1=border_y[3],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[3], y0=border_y[3], x1=border_x[0], y1=border_y[0],
    line=dict(color="black", width=1),
)

fig.show()

In [None]:
#@title t-sne for lazbf_mlm_lazdef
pca = PCA(n_components=100, random_state=1)
X_embedded = pca.fit_transform(lazbf_mlm_lazdef)

tsne = TSNE(n_components=2, perplexity=cutoff/100, learning_rate=cutoff/12, random_state=2)
X_embedded = tsne.fit_transform(X_embedded)

df = pd.DataFrame({'t-SNE 1': X_embedded[:, 0], 't-SNE 2': X_embedded[:, 1], 'labels': LazBF_sample_labels, 'alpha': [0.75]*cutoff})
fig = px.scatter(df, x='t-SNE 1', y='t-SNE 2', color='labels', color_continuous_scale=['royalblue', 'crimson'], opacity=[0.85]*cutoff)

fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    yaxis=dict(scaleanchor="y", scaleratio=1, showgrid=False, zeroline=False, color='black'),
    xaxis=dict(scaleanchor="x", scaleratio=1, showgrid=False, zeroline=False, color='black'),
    plot_bgcolor='#fff', #rgba(0, 0, 0, 0)',
    paper_bgcolor='#fff', #'rgba(0, 0, 0, 0)',
    showlegend=False,
)

# fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
# fig.update_yaxes(showline=True, linewidth=1, linecolor='black')

fig.update_traces(marker=dict(size=5))
fig.add_annotation(
    text="e",
    xref="paper",
    yref="paper",
    x=-0.2,
    y=1.13,
    showarrow=False,
    font=dict(size=32, color='black') #font=dict(family='times new roman', size=24, color='black')
)
fig.update_layout(coloraxis_showscale=False, font=dict(family="Arial"))

fig.update_xaxes(
    title_font=dict(size=24),  # Increase the font size of the x-axis title
    tickfont=dict(size=24),     # Increase the font size of tick labels
)
fig.update_yaxes(
    title_font=dict(size=24),  # Increase the font size of the y-axis title
    tickfont=dict(size=24),     # Increase the font size of tick labels
)

# Define border rectangles
x_range = [df['t-SNE 1'].min()-10, df['t-SNE 1'].max()+10]
y_range = [df['t-SNE 2'].min()-10, df['t-SNE 2'].max()+10]

border_x = [x_range[0], x_range[1], x_range[1], x_range[0], x_range[0]]
border_y = [y_range[0], y_range[0], y_range[1], y_range[1], y_range[0]]

fig.add_shape(
    type="line",
    x0=border_x[0], y0=border_y[0], x1=border_x[1], y1=border_y[1],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[1], y0=border_y[1], x1=border_x[2], y1=border_y[2],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[2], y0=border_y[2], x1=border_x[3], y1=border_y[3],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[3], y0=border_y[3], x1=border_x[0], y1=border_y[0],
    line=dict(color="black", width=1),
)

fig.show()

In [None]:
#@title t-sne for lazbf_mlm_lazbf
pca = PCA(n_components=100, random_state=1)
X_embedded = pca.fit_transform(lazbf_mlm_lazbf)

tsne = TSNE(n_components=2, perplexity=cutoff/100, learning_rate=cutoff/12, random_state=2)
X_embedded = tsne.fit_transform(X_embedded)

df = pd.DataFrame({'t-SNE 1': X_embedded[:, 0], 't-SNE 2': X_embedded[:, 1], 'labels': LazBF_sample_labels, 'alpha': [0.75]*cutoff})
fig = px.scatter(df, x='t-SNE 1', y='t-SNE 2', color='labels', color_continuous_scale=['royalblue', 'crimson'], opacity=[0.85]*cutoff)

fig.update_layout(
    autosize=False,
    width=533,
    height=500,
    yaxis=dict(scaleanchor="y", scaleratio=1, showgrid=False, zeroline=False, color='black'),
    xaxis=dict(scaleanchor="x", scaleratio=1, showgrid=False, zeroline=False, color='black'),
    plot_bgcolor='#fff', #rgba(0, 0, 0, 0)',
    paper_bgcolor='#fff', #'rgba(0, 0, 0, 0)',
    #showlegend=False,
)

# fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
# fig.update_yaxes(showline=True, linewidth=1, linecolor='black')
# fig.add_trace(go.Scatter(x=[0], y=[0], mode='markers', marker=dict(size=40, color='crimson'), name='Positive'))
# fig.add_trace(go.Scatter(x=[0], y=[0], mode='markers', marker=dict(size=40, color='royalblue'), name='Negative'))
# fig.update_layout(legend=dict(font=dict(size=20)))  # You can adjust the font size as needed (e.g., 16)

fig.update_traces(marker=dict(size=5))
fig.add_annotation(
    text="f",
    xref="paper",
    yref="paper",
    x=-0.2,
    y=1.13,
    showarrow=False,
    font=dict(size=32, color='black') #font=dict(family='times new roman', size=24, color='black')
)
fig.update_layout(coloraxis_showscale=False, font=dict(family="Arial"))

fig.update_xaxes(
    title_font=dict(size=24),  # Increase the font size of the x-axis title
    tickfont=dict(size=24),     # Increase the font size of tick labels
)
fig.update_yaxes(
    title_font=dict(size=24),  # Increase the font size of the y-axis title
    tickfont=dict(size=24),     # Increase the font size of tick labels
)

# Define border rectangles
x_range = [df['t-SNE 1'].min()-10, df['t-SNE 1'].max()+10]
y_range = [df['t-SNE 2'].min()-10, df['t-SNE 2'].max()+10]

border_x = [x_range[0], x_range[1], x_range[1], x_range[0], x_range[0]]
border_y = [y_range[0], y_range[0], y_range[1], y_range[1], y_range[0]]

fig.add_shape(
    type="line",
    x0=border_x[0], y0=border_y[0], x1=border_x[1], y1=border_y[1],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[1], y0=border_y[1], x1=border_x[2], y1=border_y[2],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[2], y0=border_y[2], x1=border_x[3], y1=border_y[3],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[3], y0=border_y[3], x1=border_x[0], y1=border_y[0],
    line=dict(color="black", width=1),
)
fig.update_layout(yaxis=dict(dtick=20))

fig.show()

In [None]:
#@title t-sne for lazdef_mlm_none
pca = PCA(n_components=100, random_state=1)
X_embedded = pca.fit_transform(lazdef_mlm_none)

tsne = TSNE(n_components=2, perplexity=cutoff/100, learning_rate=cutoff/12, random_state=2)
X_embedded = tsne.fit_transform(X_embedded)

df = pd.DataFrame({'t-SNE 1': X_embedded[:, 0], 't-SNE 2': X_embedded[:, 1], 'labels': LazBF_sample_labels, 'alpha': [0.75]*cutoff})
fig = px.scatter(df, x='t-SNE 1', y='t-SNE 2', color='labels', color_continuous_scale=['royalblue', 'crimson'], opacity=[0.85]*cutoff)

fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    yaxis=dict(scaleanchor="y", scaleratio=1, showgrid=False, zeroline=False, color='black'),
    xaxis=dict(scaleanchor="x", scaleratio=1, showgrid=False, zeroline=False, color='black'),
    plot_bgcolor='#fff', #rgba(0, 0, 0, 0)',
    paper_bgcolor='#fff', #'rgba(0, 0, 0, 0)',
    showlegend=False,
)

# fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
# fig.update_yaxes(showline=True, linewidth=1, linecolor='black')

fig.update_traces(marker=dict(size=5))
fig.add_annotation(
    text="a",
    xref="paper",
    yref="paper",
    x=-0.2,
    y=1.13,
    showarrow=False,
    font=dict(size=32, color='black') #font=dict(family='times new roman', size=24, color='black')
)
fig.update_layout(coloraxis_showscale=False, font=dict(family="Arial"))

fig.update_xaxes(
    title_font=dict(size=24),  # Increase the font size of the x-axis title
    tickfont=dict(size=24),     # Increase the font size of tick labels
)
fig.update_yaxes(
    title_font=dict(size=24),  # Increase the font size of the y-axis title
    tickfont=dict(size=24),     # Increase the font size of tick labels
)

# Define border rectangles
x_range = [df['t-SNE 1'].min()-10, df['t-SNE 1'].max()+10]
y_range = [df['t-SNE 2'].min()-10, df['t-SNE 2'].max()+10]

border_x = [x_range[0], x_range[1], x_range[1], x_range[0], x_range[0]]
border_y = [y_range[0], y_range[0], y_range[1], y_range[1], y_range[0]]

fig.add_shape(
    type="line",
    x0=border_x[0], y0=border_y[0], x1=border_x[1], y1=border_y[1],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[1], y0=border_y[1], x1=border_x[2], y1=border_y[2],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[2], y0=border_y[2], x1=border_x[3], y1=border_y[3],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[3], y0=border_y[3], x1=border_x[0], y1=border_y[0],
    line=dict(color="black", width=1),
)

fig.show()

In [None]:
#@title t-sne for lazdef_mlm_lazbf
pca = PCA(n_components=100, random_state=2)
X_embedded = pca.fit_transform(lazdef_mlm_lazbf)

tsne = TSNE(n_components=2, perplexity=cutoff/100, learning_rate=cutoff/12, random_state=2)
X_embedded = tsne.fit_transform(X_embedded)

df = pd.DataFrame({'t-SNE 1': X_embedded[:, 0], 't-SNE 2': X_embedded[:, 1], 'labels': LazBF_sample_labels, 'alpha': [0.75]*cutoff})
fig = px.scatter(df, x='t-SNE 1', y='t-SNE 2', color='labels', color_continuous_scale=['royalblue', 'crimson'], opacity=[0.85]*cutoff)

fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    yaxis=dict(scaleanchor="y", scaleratio=1, showgrid=False, zeroline=False, color='black'),
    xaxis=dict(scaleanchor="x", scaleratio=1, showgrid=False, zeroline=False, color='black'),
    plot_bgcolor='#fff', #rgba(0, 0, 0, 0)',
    paper_bgcolor='#fff', #'rgba(0, 0, 0, 0)',
    showlegend=False,
)

# fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
# fig.update_yaxes(showline=True, linewidth=1, linecolor='black')

fig.update_traces(marker=dict(size=5))
fig.add_annotation(
    text="b",
    xref="paper",
    yref="paper",
    x=-0.2,
    y=1.13,
    showarrow=False,
    font=dict(size=32, color='black') #font=dict(family='times new roman', size=24, color='black')
)
fig.update_layout(coloraxis_showscale=False, font=dict(family="Arial"))

fig.update_xaxes(
    title_font=dict(size=24),  # Increase the font size of the x-axis title
    tickfont=dict(size=24),     # Increase the font size of tick labels
)
fig.update_yaxes(
    title_font=dict(size=24),  # Increase the font size of the y-axis title
    tickfont=dict(size=24),     # Increase the font size of tick labels
)

# Define border rectangles
x_range = [df['t-SNE 1'].min()-10, df['t-SNE 1'].max()+10]
y_range = [df['t-SNE 2'].min()-10, df['t-SNE 2'].max()+10]

border_x = [x_range[0], x_range[1], x_range[1], x_range[0], x_range[0]]
border_y = [y_range[0], y_range[0], y_range[1], y_range[1], y_range[0]]

fig.add_shape(
    type="line",
    x0=border_x[0], y0=border_y[0], x1=border_x[1], y1=border_y[1],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[1], y0=border_y[1], x1=border_x[2], y1=border_y[2],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[2], y0=border_y[2], x1=border_x[3], y1=border_y[3],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[3], y0=border_y[3], x1=border_x[0], y1=border_y[0],
    line=dict(color="black", width=1),
)

fig.show()

In [None]:
#@title t-sne for lazdef_mlm_lazdef
pca = PCA(n_components=100, random_state=1)
X_embedded = pca.fit_transform(lazdef_mlm_lazdef)

tsne = TSNE(n_components=2, perplexity=cutoff/100, learning_rate=cutoff/12, random_state=2)
X_embedded = tsne.fit_transform(X_embedded)

df = pd.DataFrame({'t-SNE 1': X_embedded[:, 0], 't-SNE 2': X_embedded[:, 1], 'labels': LazBF_sample_labels, 'alpha': [0.75]*cutoff})
fig = px.scatter(df, x='t-SNE 1', y='t-SNE 2', color='labels', color_continuous_scale=['royalblue', 'crimson'], opacity=[0.85]*cutoff)

fig.update_layout(
    autosize=False,
    width=533,
    height=500,
    yaxis=dict(scaleanchor="y", scaleratio=1, showgrid=False, zeroline=False, color='black'),
    xaxis=dict(scaleanchor="x", scaleratio=1, showgrid=False, zeroline=False, color='black'),
    plot_bgcolor='#fff', #rgba(0, 0, 0, 0)',
    paper_bgcolor='#fff', #'rgba(0, 0, 0, 0)',
    #showlegend=False,
)

# fig.add_trace(go.Scatter(x=[0], y=[0], mode='markers', marker=dict(size=50, color='crimson'), name='Positive'))
# fig.add_trace(go.Scatter(x=[0], y=[0], mode='markers', marker=dict(size=50, color='royalblue'), name='Negative'))


# fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
# fig.update_yaxes(showline=True, linewidth=1, linecolor='black')

fig.update_traces(marker=dict(size=5))
fig.add_annotation(
    text="c",
    xref="paper",
    yref="paper",
    x=-0.2,
    y=1.13,
    showarrow=False,
    font=dict(size=32, color='black') #font=dict(family='times new roman', size=24, color='black')
)
fig.update_layout(coloraxis_showscale=False, font=dict(family="Arial"))

fig.update_xaxes(
    title_font=dict(size=24),  # Increase the font size of the x-axis title
    tickfont=dict(size=24),     # Increase the font size of tick labels
)
fig.update_yaxes(
    title_font=dict(size=24),  # Increase the font size of the y-axis title
    tickfont=dict(size=24),     # Increase the font size of tick labels
)

# Define border rectangles
x_range = [df['t-SNE 1'].min()-10, df['t-SNE 1'].max()+10]
y_range = [df['t-SNE 2'].min()-10, df['t-SNE 2'].max()+10]

border_x = [x_range[0], x_range[1], x_range[1], x_range[0], x_range[0]]
border_y = [y_range[0], y_range[0], y_range[1], y_range[1], y_range[0]]

fig.add_shape(
    type="line",
    x0=border_x[0], y0=border_y[0], x1=border_x[1], y1=border_y[1],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[1], y0=border_y[1], x1=border_x[2], y1=border_y[2],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[2], y0=border_y[2], x1=border_x[3], y1=border_y[3],
    line=dict(color="black", width=1),
)
fig.add_shape(
    type="line",
    x0=border_x[3], y0=border_y[3], x1=border_x[0], y1=border_y[0],
    line=dict(color="black", width=1),
)
fig.update_layout(yaxis=dict(dtick=20))

fig.show()