In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score

# Assuming CNN_y_true, CNN_y_pred_prob, LSTM_y_true, LSTM_y_pred_prob, y_true, y_pred_prob are defined

# Calculate ROC curve and AUC for CNN
CNN_fpr, CNN_tpr, _ = roc_curve(CNN_y_true, CNN_y_pred_prob)
CNN_roc_auc = roc_auc_score(CNN_y_true, CNN_y_pred_prob)

# Calculate ROC curve and AUC for LSTM
LSTM_fpr, LSTM_tpr, _ = roc_curve(LSTM_y_true, LSTM_y_pred_prob)
LSTM_roc_auc = roc_auc_score(LSTM_y_true, LSTM_y_pred_prob)

# Calculate ROC curve and AUC for LSTM-CNN
LSTM_CNN_fpr, LSTM_CNN_tpr, _ = roc_curve(y_true, y_pred_prob)
LSTM_CNN_roc_auc = roc_auc_score(y_true, y_pred_prob)

# Plot all ROC curves on the same graph
plt.figure(figsize=(12, 6))

plt.plot(CNN_fpr, CNN_tpr, color='orange', lw=2, label=f'CNN ROC curve (AUC = {CNN_roc_auc:.2f})')
plt.plot(LSTM_fpr, LSTM_tpr, color='blue', lw=2, label=f'LSTM ROC curve (AUC = {LSTM_roc_auc:.2f})')
plt.plot(LSTM_CNN_fpr, LSTM_CNN_tpr, color='green', lw=2, label=f'LSTM-CNN ROC curve (AUC = {LSTM_CNN_roc_auc:.2f})')

plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic - Comparison of Models')
plt.legend(loc='lower right')
plt.show()


In [None]:
import numpy as np
import umap
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from yellowbrick.cluster import KElbowVisualizer

# Load features and labels
X = np.load('/content/drive/MyDrive/esm_features_35M.npy')
y = np.load('/content/drive/MyDrive/labels_35M.npy')

# Perform UMAP dimensionality reduction
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, metric='euclidean')
X_umap = reducer.fit_transform(X)

# Perform t-SNE dimensionality reduction
X_tsne = TSNE(n_components=2, random_state=42).fit_transform(X)

# Plot UMAP with labels
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=y, cmap='viridis', alpha=0.5,s=5)
plt.colorbar(scatter, label='Class Label')
plt.title('UMAP Visualization of ESM2 Features Colored by Class')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.show()

# Plot t-SNE with labels
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='viridis', alpha=0.5,s=5)
plt.colorbar(scatter, label='Class Label')
plt.title('t-SNE Visualization of ESM2 Features Colored by Class')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.show()

# K-Means clustering on UMAP reduced dimensions
kmeans_umap = KMeans(random_state=42)
elbow_visualizer = KElbowVisualizer(kmeans_umap, k=(2,10))
elbow_visualizer.fit(X_umap)
elbow_visualizer.show()

# K-Means clustering on t-SNE reduced dimensions
kmeans_tsne = KMeans(random_state=42)
elbow_visualizer = KElbowVisualizer(kmeans_tsne, k=(2,10))
elbow_visualizer.fit(X_tsne)
elbow_visualizer.show()

# After determining the optimal number of clusters from the elbow method
optimal_clusters_umap = elbow_visualizer.elbow_value_
optimal_clusters_tsne = elbow_visualizer.elbow_value_

# Apply K-Means with the optimal number of clusters
kmeans_umap_final = KMeans(n_clusters=optimal_clusters_umap, random_state=42).fit(X_umap)
kmeans_tsne_final = KMeans(n_clusters=optimal_clusters_tsne, random_state=42).fit(X_tsne)

# Calculate silhouette scores
silhouette_umap = silhouette_score(X_umap, kmeans_umap_final.labels_)
silhouette_tsne = silhouette_score(X_tsne, kmeans_tsne_final.labels_)

print(f'Silhouette Score for UMAP: {silhouette_umap}')
print(f'Silhouette Score for t-SNE: {silhouette_tsne}')

# Visualize the clusters
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=kmeans_umap_final.labels_, cmap='viridis', alpha=0.5,s=5)
plt.colorbar(scatter, label='Cluster Label')
plt.title(f'UMAP Clustering (n_clusters={optimal_clusters_umap})')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.show()

plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=kmeans_tsne_final.labels_, cmap='viridis', alpha=0.5,s=5)
plt.colorbar(scatter, label='Cluster Label')
plt.title(f't-SNE Clustering (n_clusters={optimal_clusters_tsne})')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.show()

# Plot UMAP clusters colored by their original class labels (positive or negative)
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=y, cmap='coolwarm', alpha=0.5,s=5)  # Use 'coolwarm' to differentiate classes
plt.colorbar(scatter, label='Original Class Label')
plt.title('UMAP Clusters Colored by Original Class Labels')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.show()

# Plot t-SNE clusters colored by their original class labels (positive or negative)
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='coolwarm', alpha=0.5,s=5)  # Use 'coolwarm' to differentiate classes
plt.colorbar(scatter, label='Original Class Label')
plt.title('t-SNE Clusters Colored by Original Class Labels')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.show()


In [None]:
import numpy as np
import umap
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from yellowbrick.cluster import KElbowVisualizer

# Load features and labels
X = np.load('/content/drive/MyDrive/manu_features_acc.npy')
y = np.load('/content/drive/MyDrive/manu_labels_acc.npy')

# Perform UMAP dimensionality reduction
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, metric='euclidean')
X_umap = reducer.fit_transform(X)

# Perform t-SNE dimensionality reduction
X_tsne = TSNE(n_components=2, random_state=42).fit_transform(X)

# Plot UMAP with labels
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=y, cmap='viridis', alpha=0.5,s=3)
plt.colorbar(scatter, label='Class Label')
plt.title('UMAP Visualization of ESM2 Features Colored by Class')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.show()

# Plot t-SNE with labels
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='viridis', alpha=0.5,s=3)
plt.colorbar(scatter, label='Class Label')
plt.title('t-SNE Visualization of ESM2 Features Colored by Class')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.show()

# K-Means clustering on UMAP reduced dimensions
kmeans_umap = KMeans(random_state=42)
elbow_visualizer = KElbowVisualizer(kmeans_umap, k=(2,10))
elbow_visualizer.fit(X_umap)
elbow_visualizer.show()

# K-Means clustering on t-SNE reduced dimensions
kmeans_tsne = KMeans(random_state=42)
elbow_visualizer = KElbowVisualizer(kmeans_tsne, k=(2,10))
elbow_visualizer.fit(X_tsne)
elbow_visualizer.show()

# After determining the optimal number of clusters from the elbow method
optimal_clusters_umap = elbow_visualizer.elbow_value_
optimal_clusters_tsne = elbow_visualizer.elbow_value_

# Apply K-Means with the optimal number of clusters
kmeans_umap_final = KMeans(n_clusters=optimal_clusters_umap, random_state=42).fit(X_umap)
kmeans_tsne_final = KMeans(n_clusters=optimal_clusters_tsne, random_state=42).fit(X_tsne)

# Calculate silhouette scores
silhouette_umap = silhouette_score(X_umap, kmeans_umap_final.labels_)
silhouette_tsne = silhouette_score(X_tsne, kmeans_tsne_final.labels_)

print(f'Silhouette Score for UMAP: {silhouette_umap}')
print(f'Silhouette Score for t-SNE: {silhouette_tsne}')

# Visualize the clusters
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=kmeans_umap_final.labels_, cmap='viridis', alpha=0.5,s=3)
plt.colorbar(scatter, label='Cluster Label')
plt.title(f'UMAP Clustering (n_clusters={optimal_clusters_umap})')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.show()

plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=kmeans_tsne_final.labels_, cmap='viridis', alpha=0.5,s=3)
plt.colorbar(scatter, label='Cluster Label')
plt.title(f't-SNE Clustering (n_clusters={optimal_clusters_tsne})')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.show()

# Plot UMAP clusters colored by their original class labels (positive or negative)
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=y, cmap='coolwarm', alpha=0.5,s=3)  # Use 'coolwarm' to differentiate classes
plt.colorbar(scatter, label='Original Class Label')
plt.title('UMAP Clusters Colored by Original Class Labels')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.show()

# Plot t-SNE clusters colored by their original class labels (positive or negative)
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='coolwarm', alpha=0.5,s=3)  # Use 'coolwarm' to differentiate classes
cbar = plt.colorbar(scatter, ticks=[0, 1])
cbar.set_label('Original Class Label')
plt.grid(True)

plt.title('t-SNE Clusters Colored by Original Class Labels')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.savefig('tsne_acc.pdf', format='pdf')
plt.show()


In [None]:
import numpy as np
import umap
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from yellowbrick.cluster import KElbowVisualizer

# Load features and labels
X = np.load('/content/drive/MyDrive/manu_features_cksaap.npy')
y = np.load('/content/drive/MyDrive/manu_labels_cksaap.npy')

# Perform UMAP dimensionality reduction
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, metric='euclidean')
X_umap = reducer.fit_transform(X)

# Perform t-SNE dimensionality reduction
X_tsne = TSNE(n_components=2, random_state=42).fit_transform(X)

# Plot UMAP with labels
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=y, cmap='viridis', alpha=0.5,s=3)
plt.colorbar(scatter, label='Class Label')
plt.title('UMAP Visualization of ESM2 Features Colored by Class')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.show()

# Plot t-SNE with labels
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='viridis', alpha=0.5,s=3)
plt.colorbar(scatter, label='Class Label')
plt.title('t-SNE Visualization of ESM2 Features Colored by Class')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.show()

# K-Means clustering on UMAP reduced dimensions
kmeans_umap = KMeans(random_state=42)
elbow_visualizer = KElbowVisualizer(kmeans_umap, k=(2,10))
elbow_visualizer.fit(X_umap)
elbow_visualizer.show()

# K-Means clustering on t-SNE reduced dimensions
kmeans_tsne = KMeans(random_state=42)
elbow_visualizer = KElbowVisualizer(kmeans_tsne, k=(2,10))
elbow_visualizer.fit(X_tsne)
elbow_visualizer.show()

# After determining the optimal number of clusters from the elbow method
optimal_clusters_umap = elbow_visualizer.elbow_value_
optimal_clusters_tsne = elbow_visualizer.elbow_value_

# Apply K-Means with the optimal number of clusters
kmeans_umap_final = KMeans(n_clusters=optimal_clusters_umap, random_state=42).fit(X_umap)
kmeans_tsne_final = KMeans(n_clusters=optimal_clusters_tsne, random_state=42).fit(X_tsne)

# Calculate silhouette scores
silhouette_umap = silhouette_score(X_umap, kmeans_umap_final.labels_)
silhouette_tsne = silhouette_score(X_tsne, kmeans_tsne_final.labels_)

print(f'Silhouette Score for UMAP: {silhouette_umap}')
print(f'Silhouette Score for t-SNE: {silhouette_tsne}')

# Visualize the clusters
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=kmeans_umap_final.labels_, cmap='viridis', alpha=0.5,s=3)
plt.colorbar(scatter, label='Cluster Label')
plt.title(f'UMAP Clustering (n_clusters={optimal_clusters_umap})')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.show()

plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=kmeans_tsne_final.labels_, cmap='viridis', alpha=0.5,s=3)
plt.colorbar(scatter, label='Cluster Label')
plt.title(f't-SNE Clustering (n_clusters={optimal_clusters_tsne})')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.show()

# Plot UMAP clusters colored by their original class labels (positive or negative)
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=y, cmap='coolwarm', alpha=0.5,s=3)  # Use 'coolwarm' to differentiate classes
plt.colorbar(scatter, label='Original Class Label')
plt.title('UMAP Clusters Colored by Original Class Labels')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.show()

# Plot t-SNE clusters colored by their original class labels (positive or negative)
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='coolwarm', alpha=0.5,s=3)  # Use 'coolwarm' to differentiate classes
cbar = plt.colorbar(scatter, ticks=[0, 1])
cbar.set_label('Original Class Label')
plt.grid(True)

plt.title('t-SNE Clusters Colored by Original Class Labels')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.savefig('tsne_cksaap.pdf', format='pdf')
plt.show()


In [None]:
import numpy as np
import umap
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from yellowbrick.cluster import KElbowVisualizer

# Load features and labels
X = np.load('/content/drive/MyDrive/manu_features_dde.npy')
y = np.load('/content/drive/MyDrive/manu_labels_dde.npy')

# Perform UMAP dimensionality reduction
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, metric='euclidean')
X_umap = reducer.fit_transform(X)

# Perform t-SNE dimensionality reduction
X_tsne = TSNE(n_components=2, random_state=42).fit_transform(X)

# Plot UMAP with labels
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=y, cmap='viridis', alpha=0.5,s=3)
plt.colorbar(scatter, label='Class Label')
plt.title('UMAP Visualization of ESM2 Features Colored by Class')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.show()

# Plot t-SNE with labels
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='viridis', alpha=0.5,s=3)
plt.colorbar(scatter, label='Class Label')
plt.title('t-SNE Visualization of ESM2 Features Colored by Class')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.show()

# K-Means clustering on UMAP reduced dimensions
kmeans_umap = KMeans(random_state=42)
elbow_visualizer = KElbowVisualizer(kmeans_umap, k=(2,10))
elbow_visualizer.fit(X_umap)
elbow_visualizer.show()

# K-Means clustering on t-SNE reduced dimensions
kmeans_tsne = KMeans(random_state=42)
elbow_visualizer = KElbowVisualizer(kmeans_tsne, k=(2,10))
elbow_visualizer.fit(X_tsne)
elbow_visualizer.show()

# After determining the optimal number of clusters from the elbow method
optimal_clusters_umap = elbow_visualizer.elbow_value_
optimal_clusters_tsne = elbow_visualizer.elbow_value_

# Apply K-Means with the optimal number of clusters
kmeans_umap_final = KMeans(n_clusters=optimal_clusters_umap, random_state=42).fit(X_umap)
kmeans_tsne_final = KMeans(n_clusters=optimal_clusters_tsne, random_state=42).fit(X_tsne)

# Calculate silhouette scores
silhouette_umap = silhouette_score(X_umap, kmeans_umap_final.labels_)
silhouette_tsne = silhouette_score(X_tsne, kmeans_tsne_final.labels_)

print(f'Silhouette Score for UMAP: {silhouette_umap}')
print(f'Silhouette Score for t-SNE: {silhouette_tsne}')

# Visualize the clusters
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=kmeans_umap_final.labels_, cmap='viridis', alpha=0.5,s=3)
plt.colorbar(scatter, label='Cluster Label')
plt.title(f'UMAP Clustering (n_clusters={optimal_clusters_umap})')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.show()

plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=kmeans_tsne_final.labels_, cmap='viridis', alpha=0.5,s=3)
plt.colorbar(scatter, label='Cluster Label')
plt.title(f't-SNE Clustering (n_clusters={optimal_clusters_tsne})')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.show()

# Plot UMAP clusters colored by their original class labels (positive or negative)
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=y, cmap='coolwarm', alpha=0.5,s=3)  # Use 'coolwarm' to differentiate classes
plt.colorbar(scatter, label='Original Class Label')
plt.title('UMAP Clusters Colored by Original Class Labels')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.show()

# Plot t-SNE clusters colored by their original class labels (positive or negative)
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='coolwarm', alpha=0.5,s=3)  # Use 'coolwarm' to differentiate classes
cbar = plt.colorbar(scatter, ticks=[0, 1])
cbar.set_label('Original Class Label')
plt.grid(True)

plt.title('t-SNE Clusters Colored by Original Class Labels')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.savefig('tsne_dde.pdf', format='pdf')
plt.show()
