In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import matplotlib.patches as mpatches

# 1. CNN - Filter Activations and Feature Maps


In [2]:
# Simulate a protein sequence as a one-hot encoded matrix
sequence_length = 50  # Length of the protein sequence
num_amino_acids = 20  # Number of possible amino acids (for simplicity, we use 20 for one-hot encoding)
protein_sequence = np.random.randint(0, num_amino_acids, sequence_length)

# Create a simple feature map (simulated)
filters = np.random.randn(5, sequence_length)  # 5 filters for convolution

# Apply the filters to the protein sequence (simple dot product for demo)
activations = np.dot(filters, protein_sequence).reshape(-1,1)
yticklabels=[f'Filter {i+1}' for i in range(activations.shape[0])]
print(yticklabels)

['Filter 1', 'Filter 2', 'Filter 3', 'Filter 4', 'Filter 5']


In [3]:

# Plot heatmap of activations
plt.figure(figsize=(10, 6))
# Adjust xticklabels and yticklabels
sns.heatmap(activations, cmap="YlGnBu", cbar=True, xticklabels = np.arange(1, 51).reshape(-1, 1), 
            yticklabels=[f'Filter {i+1}' for i in range(activations.shape[0])])
plt.title('CNN Feature Map Activations on Protein Sequence')
plt.xlabel('Amino Acid Position')
plt.ylabel('Filters')
plt.tight_layout()
plt.savefig("cnn_feature_map_activations.pdf")  # Save as PDF
plt.close()


# 2. ProtBERT - Attention Map


In [4]:
'''# Simulate a protein sequence with 20 amino acids
protein_sequence = ["A", "C", "G", "T", "D", "V", "L", "M", "E", "Q", "H", "I", "K", "F", "R", "P", "S", "W", "Y", "N"]
num_amino_acids = len(protein_sequence)

# Simulate attention scores between each pair of amino acids
attention_matrix = np.random.rand(num_amino_acids, num_amino_acids)

# Create a heatmap for the attention map
fig = px.imshow(attention_matrix, labels=dict(x="Amino Acid", y="Amino Acid", color="Attention Weight"), 
                x=protein_sequence, y=protein_sequence, color_continuous_scale="Viridis")
fig.update_layout(title="ProtBERT Attention Map for Protein Sequence")
fig.write_image("protbert_attention_map.pdf")  # Save as PDF
'''

'# Simulate a protein sequence with 20 amino acids\nprotein_sequence = ["A", "C", "G", "T", "D", "V", "L", "M", "E", "Q", "H", "I", "K", "F", "R", "P", "S", "W", "Y", "N"]\nnum_amino_acids = len(protein_sequence)\n\n# Simulate attention scores between each pair of amino acids\nattention_matrix = np.random.rand(num_amino_acids, num_amino_acids)\n\n# Create a heatmap for the attention map\nfig = px.imshow(attention_matrix, labels=dict(x="Amino Acid", y="Amino Acid", color="Attention Weight"), \n                x=protein_sequence, y=protein_sequence, color_continuous_scale="Viridis")\nfig.update_layout(title="ProtBERT Attention Map for Protein Sequence")\nfig.write_image("protbert_attention_map.pdf")  # Save as PDF\n'


# 3. LSTM - Hidden State Evolution


In [5]:
# Simulating hidden state values for each time step of a protein sequence
sequence_length = 50
hidden_state = np.random.randn(sequence_length, 3)  # 3 hidden state dimensions

# Plot the evolution of the hidden state
plt.figure(figsize=(10, 6))
plt.plot(hidden_state[:, 0], label='Hidden State 1', color='r')
plt.plot(hidden_state[:, 1], label='Hidden State 2', color='g')
plt.plot(hidden_state[:, 2], label='Hidden State 3', color='b')
plt.title('LSTM Hidden State Evolution')
plt.xlabel('Time Step (Amino Acid Position)')
plt.ylabel('Hidden State Value')
plt.legend()
plt.tight_layout()
plt.savefig("lstm_hidden_state_evolution.pdf")  # Save as PDF
plt.close()



# 4. LSTM - Cell State Flow


In [6]:
# Simulating cell state values for each time step of a protein sequence
sequence_length = 50
cell_state = np.random.randn(sequence_length)

# Plot the evolution of the cell state
plt.figure(figsize=(10, 6))
plt.plot(cell_state, label='Cell State', color='orange')
plt.title('LSTM Cell State Evolution')
plt.xlabel('Time Step (Amino Acid Position)')
plt.ylabel('Cell State Value')
plt.legend()
plt.tight_layout()
plt.savefig("lstm_cell_state_evolution.pdf")  # Save as PDF
plt.close()



# 5. Comparison of CNN, ProtBERT, and LSTM Architectures


In [7]:
# Create a figure for the model comparison
fig, ax = plt.subplots(figsize=(10, 6))

# Drawing boxes for each model's architecture
cnn_box = mpatches.FancyBboxPatch((0.05, 0.6), 0.3, 0.3, boxstyle="round,pad=0.1", edgecolor="black", facecolor="lightblue")
protbert_box = mpatches.FancyBboxPatch((0.4, 0.6), 0.3, 0.3, boxstyle="round,pad=0.1", edgecolor="black", facecolor="lightgreen")
lstm_box = mpatches.FancyBboxPatch((0.75, 0.6), 0.3, 0.3, boxstyle="round,pad=0.1", edgecolor="black", facecolor="lightcoral")

# Add the boxes to the plot
ax.add_patch(cnn_box)
ax.add_patch(protbert_box)
ax.add_patch(lstm_box)

# Add text inside the boxes
ax.text(0.2, 0.8, 'CNN: Convolutional Layers + Pooling', fontsize=12, ha='center')
ax.text(0.55, 0.8, 'ProtBERT: Transformer with Attention', fontsize=12, ha='center')
ax.text(0.9, 0.8, 'LSTM: Recurrent Layers with Memory', fontsize=12, ha='center')

# Add arrows to show data flow between boxes
ax.arrow(0.35, 0.75, 0.05, 0, head_width=0.02, head_length=0.03, fc='black', ec='black')
ax.arrow(0.7, 0.75, 0.05, 0, head_width=0.02, head_length=0.03, fc='black', ec='black')

# Title and Labels
ax.set_title("Comparison of CNN, ProtBERT, and LSTM Architectures for Protein Sequences")
ax.set_axis_off()

plt.tight_layout()
plt.savefig("model_comparison.pdf")  # Save as PDF
plt.close()



# 6. Performance Comparison (Bar Chart)


In [8]:
# Simulated performance metrics
models = ['CNN', 'ProtBERT', 'LSTM']
accuracy = [0.85, 0.90, 0.88]
precision = [0.80, 0.85, 0.83]
recall = [0.82, 0.87, 0.84]

# Plotting the performance comparison
fig, ax = plt.subplots(figsize=(10, 6))

bar_width = 0.2
x = np.arange(len(models))

# Creating bars for each metric
ax.bar(x - bar_width, accuracy, bar_width, label='Accuracy')
ax.bar(x, precision, bar_width, label='Precision')
ax.bar(x + bar_width, recall, bar_width, label='Recall')

# Labeling the plot
ax.set_xlabel('Models')
ax.set_ylabel('Score')
ax.set_title('Performance Comparison: CNN, ProtBERT, and LSTM')
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.legend()

plt.tight_layout()
plt.savefig("performance_comparison.pdf")  # Save as PDF
plt.close()

