In [None]:
import matplotlib.pyplot as plt

# Plot training & validation loss values
plt.figure(figsize=(14, 9))


# Plot training & validation accuracy values
plt.plot(history.history['broad_output_accuracy'])
plt.plot(history.history['val_broad_output_accuracy'])
plt.plot(history.history['detailed_output_accuracy'])
plt.plot(history.history['val_detailed_output_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train Broad', 'Validation Broad', 'Train Detailed', 'Validation Detailed'])

plt.show()


import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, precision_recall_curve, roc_auc_score, precision_score, recall_score
import numpy as np
# Predict probabilities for test set
y_broad_pred_prob = model.predict(X_test)[0]  # Probabilities for broad labels
y_detailed_pred_prob = model.predict(X_test)[1]  # Probabilities for detailed labels

# For binary classification, we need the probability of the positive class
# For multiclass classification, we'll handle each class separately
# Example for binary classification
def plot_roc_curve(y_true, y_pred_prob, label):
    fpr, tpr, _ = roc_curve(y_true, y_pred_prob)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(10, 7))
    plt.plot(fpr, tpr, color='green', lw=2, label=f'{label} ROC curve (area = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='coral', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - {label}')
    plt.legend(loc='lower right')
    plt.show()


 # Plot ROC for each class (if multiclass)
for i, class_name in enumerate(broad_label_encoder.classes_):
    plot_roc_curve(y_broad_test[:, i], y_broad_pred_prob[:, i], class_name)



def plot_precision_recall_curve(y_true, y_pred_prob, label):
    precision, recall, _ = precision_recall_curve(y_true, y_pred_prob)
    pr_auc = auc(recall, precision)

    plt.figure(figsize=(10, 7))
    plt.plot(recall, precision, color='blue', lw=2, label=f'{label} Precision-Recall curve (area = {pr_auc:.1f})')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curve - {label}')
    plt.legend(loc='lower left')
    plt.show()

# Plot Precision-Recall for each class (if multiclass)
for i, class_name in enumerate(broad_label_encoder.classes_):
    plot_precision_recall_curve(y_broad_test[:, i], y_broad_pred_prob[:, i], class_name)



from sklearn.preprocessing import label_binarize

# Binarize the output
y_test_bin = label_binarize(np.argmax(y_test, axis=1), classes=np.arange(y_test.shape[1]))
y_detailed_pred_prob_bin = np.array(y_detailed_pred_prob)

# Plot ROC curve for each class
def plot_multiclass_roc(y_true_bin, y_pred_prob, class_names):
    plt.figure(figsize=(12, 8))
    for i, class_name in enumerate(class_names):
        fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_pred_prob[:, i])
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, lw=2, label=f'Class {class_name} (area = {roc_auc:.1f})')
    
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.01, 1.05])
    plt.ylim([0.01, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve for Multiclass')
    plt.legend(loc='lower right')
    plt.show()

plot_multiclass_roc(y_test_bin, y_detailed_pred_prob, protein_label_encoder.classes_)



def plot_multiclass_precision_recall(y_true_bin, y_pred_prob, class_names):
    plt.figure(figsize=(12, 8))
    for i, class_name in enumerate(class_names):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_pred_prob[:, i])
        pr_auc = auc(recall, precision)
        plt.plot(recall, precision, lw=2, label=f'Class {class_name} (area = {pr_auc:.1f})')
    
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve for Multiclass')
    plt.legend(loc='best')
    plt.show()

plot_multiclass_precision_recall(y_test_bin, y_detailed_pred_prob, protein_label_encoder.classes_)



In [None]:
#CREATE CONFUSION MATRIX with WHITE GRID
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import numpy as np

# Function to plot a confusion matrix heatmap with a white grid
def plot_confusion_matrix(y_true, y_pred, labels, normalize=False, cmap='tab10', title='Confusion Matrix'):
    cm = confusion_matrix(y_true, y_pred)
    
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        fmt = '.2f'
    else:
        fmt = 'd'

    plt.figure(figsize=(10, 7))
    sns.set(font_scale=1.2)  # Increase font size for better readability
    
    # Plot heatmap with annotations
    ax = sns.heatmap(cm, annot=True, fmt=fmt, cmap=cmap, 
                     cbar=True, linewidths=1.5, linecolor='white', 
                     xticklabels=labels, yticklabels=labels)
    
    # Customize labels and ticks
    plt.ylabel('True Labels', fontsize=14)
    plt.xlabel('Predicted Labels', fontsize=14)
    plt.title(title, fontsize=16)
    
    # Rotate tick labels for better readability
    plt.xticks(rotation=45)
    plt.yticks(rotation=45)

   
    ax.grid(which="minor", color="black", linestyle='-', linewidth=2)
    ax.tick_params(which="minor", size=0)  # Hide minor tick markers
    plt.grid(False)
    
    plt.show()

# Assuming you have y_broad_true and y_broad_pred from your predictions
y_broad_true = np.argmax(y_broad_test, axis=1)  # True broad labels from test set
y_broad_pred = np.argmax(y_broad_pred_prob, axis=1)  # Predicted broad labels from model

# Plot Confusion Matrix for broad labels
plot_confusion_matrix(
    y_broad_true, y_broad_pred, 
    labels=broad_label_encoder.classes_,  # Label names for broad labels
    normalize=True,  # Whether to normalize the confusion matrix
    cmap='tab10',  # Use 'Purples' color map
    title='Normalized Confusion Matrix for Broad Labels'
)

# Assuming you have y_detailed_true and y_detailed_pred for detailed labels
y_detailed_true = np.argmax(y_test, axis=1)  # True detailed labels from test set
y_detailed_pred = np.argmax(y_detailed_pred_prob, axis=1)  # Predicted detailed labels from model

# Plot Confusion Matrix for detailed labels
plot_confusion_matrix(
    y_detailed_true, y_detailed_pred, 
    labels=protein_label_encoder.classes_,  # Label names for detailed labels
    normalize=False,  
    cmap='cividis',  # Use 'Blues' color map
    title='Confusion Matrix for Detailed Labels'
)


In [None]:
# Create a heatmap using Seaborn for pairwise similarity
plt.figure(figsize=(18, 16))
sns.heatmap(similarity_matrix, cmap="BuPu", annot=True, fmt=".1f", 
            xticklabels=protein_list, yticklabels=protein_list, 
            cbar_kws={'label': 'Similarity (%)'}, linewidths=0.5, linecolor='white')

# Add titles and labels
plt.title("Pairwise Sequence Similarity Heatmap (Representative Sequences)", fontsize=16, weight='bold')
plt.xlabel("Protein Type", fontsize=12)
plt.ylabel("Protein Type", fontsize=12)

# Display the heatmap
plt.show()

# Create a percentage heatmap using Seaborn for pairwise similarity
percentage_matrix = np.round(similarity_matrix, 1)  # Round to one decimal place

plt.figure(figsize=(14, 12))
sns.heatmap(percentage_matrix, cmap="magma", annot=np.char.mod('%.1f%%', percentage_matrix), 
            xticklabels=protein_list, yticklabels=protein_list, 
            cbar_kws={'label': 'Similarity (%)'}, linewidths=0.5, linecolor='white', fmt='')

# Add titles and labels
plt.title("Pairwise Sequence Similarity Heatmap (Percentages)", fontsize=16, weight='bold')
plt.xlabel("Protein Type", fontsize=12)
plt.ylabel("Protein Type", fontsize=12)

# Display the heatmap
plt.show()