<a href="https://colab.research.google.com/github/Bio2Byte/public_notebooks/blob/main/PIP_B2B_Tools_using_files.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Bio2Byte - Single Sequence Predictions
For most of below tools, we now have the b2bTools one-stop file upload location where you can easily try them: [online b2BTools](https://bio2byte.be/b2btools/)

In [None]:
#@title Install dependencies
%%capture

!pip install b2bTools==3.0.4

In [None]:
#@title Import dependencies
import os
import json
from google.colab import files
from b2bTools import SingleSeq
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D

if not os.path.exists("/content/results"):
    os.mkdir('/content/results')

In [None]:
#@title Input protein sequences
#@markdown Please select the predictor tools to be executed and run this cell
#@markdown to open the file prompt in order to upload a valid FASTA file.

#@markdown **🚨⚠️ IMPORTANT:** If beta aggregation is selected, the input file must contain up to 10 sequences.

input_fasta = ''

#@markdown ### DynaMine predictor tool
#@markdown >Fast predictor of protein backbone dynamics using only sequence information as input. 
#@markdown >The version here also predicts side-chain dynamics and secondary structure predictors 
#@markdown >using the same principle.

#@markdown **Prediction values included**: `backbone`, `sidechain`, `helix`, `ppII`, `coil`, and `sheet`

dynamics = True #@param {type:"boolean"}

#@markdown ### DisoMine predictor tool
#@markdown >Predicts protein disorder with recurrent neural networks not directly 
#@markdown >from the amino acid sequence, but instead from more generic predictions of key 
#@markdown >biophysical properties, here protein dynamics, secondary structure 
#@markdown >and early folding.

#@markdown **Prediction values included**: `disoMine`

disorder = True #@param {type:"boolean"}

#@markdown ### EFoldMine predictor tool
#@markdown >Predicts from the primary amino acid sequence of a protein, 
#@markdown >which amino acids are likely involved in early folding events.

#@markdown **Prediction values included**: `earlyFolding`

early_folding = True #@param {type:"boolean"}

#@markdown ### AgMata predictor tool
#@markdown >Single-sequence based predictor of protein regions that are likely 
#@markdown >to cause beta-aggregation.

#@markdown **Prediction values included**: `agmata`

beta_aggretation = False #@param {type:"boolean"}

tools = []
if dynamics:
    tools.append("dynamine")
if disorder:
    tools.append("disomine")
if early_folding:
    tools.append("efoldmine")
if beta_aggretation:
    tools.append("agmata")

uploaded = files.upload()
for fn in uploaded.keys():
    print('User uploaded file "{name}" with length {length} bytes'.format(
        name=fn, length=len(uploaded[fn])))
    input_fasta = fn

In [None]:
#@title Run predictors
%%capture

single_seq = SingleSeq(input_fasta)
single_seq.predict(tools)

all_predictions = single_seq.get_all_predictions()
keys = list(all_predictions.keys())

max_seq_len = max([len(pred['seq']) for pred in all_predictions.values()])

In [None]:
#@title Plot Single Sequence predictions
#@markdown The predictions reflect 'emerging' properties, so what the sequence 
#@markdown is capable of, not necessarily what it will do in a particular 
#@markdown context, for example when it adopts a specific fold.

#@markdown #### DynaMine backbone dynamics
#@markdown > Values above 0.8 indicate rigid conformations, 
#@markdown > values above 1.0 membrane spanning regions, 
#@markdown > values below 0.69 flexible regions. 
#@markdown > Values between 0.69-0.80 are 'context' dependent and capable of
#@markdown > being either rigid or flexible.

#@markdown #### DynaMine sidechain dynamics
#@markdown > Higher values mean more likely rigid. 
#@markdown > These values are highly dependent on the amino acid type 
#@markdown > (i.e. a Trp will be rigid, an Asp flexible).

#@markdown #### DynaMine conformational propensities (sheet, helix, coil, ppII (polyproline II))
#@markdown > Higher values indicate higher propensities.

#@markdown #### EFoldMine earlyFolding propensity
#@markdown > Values above 0.169 indicate residues that are likely to start 
#@markdown > the protein folding process, based on only local interactions 
#@markdown > with other amino acids.

#@markdown #### Disomine disorder
#@markdown >  Values above 0.5 indicate that this is likely a disordered residue.


fig, axs = plt.subplots(2, 4)

ax1 = axs[0, 0]
ax2 = axs[0, 1]
ax3 = axs[0, 2]
ax4 = axs[0, 3]
ax5 = axs[1, 0]
ax6 = axs[1, 1]
ax7 = axs[1, 2]
ax8 = axs[1, 3]

fig.set_figwidth(30)
fig.set_figheight(10)

fig.suptitle(f'Single Sequence Predictions')

for seq_key in keys:
    predictions = all_predictions[seq_key]
    x_position = range(len(predictions['seq']))

    if dynamics:
        backbone_pred = predictions['backbone']
        coil_pred = predictions['coil']
        sheet_pred = predictions['sheet']
        ppII_pred = predictions['ppII']
        helix_pred = predictions['helix']
        sidechain_pred = predictions['sidechain']

        ax1.plot(x_position, backbone_pred, label=seq_key)
        ax2.plot(x_position, sidechain_pred, label=seq_key)
        ax3.plot(x_position, coil_pred, label=seq_key)
        ax4.plot(x_position, sheet_pred, label=seq_key)
        ax5.plot(x_position, ppII_pred, label=seq_key)
        ax6.plot(x_position, helix_pred, label=seq_key)
                    
    if disorder:
        disomine_pred = predictions['disoMine']
        ax7.plot(x_position, disomine_pred, label=seq_key)

    
    if early_folding:
        earlyFolding_pred = predictions['earlyFolding'] 
        ax8.plot(x_position, earlyFolding_pred, label=seq_key)

    if beta_aggretation:
        agmata_pred = predictions['agmata']

ax1.set_title('DynaMine backbone dynamics')
ax1.set_ylim([-0.2, 1.2])
ax1.set_xlabel('residue index')
ax1.set_ylabel('prediction values')
ax1.axhspan(1, 1.2, alpha=0.3, color='red')
ax1.axhspan(0.8, 1, alpha=0.5, color='pink')
ax1.axhspan(0.69, 0.8, alpha=0.5, color='orange')
ax1.axhspan(-0.2, 0.69, alpha=0.5, color='yellow')
ax1.grid(axis='y')
ax1.set_xlim([0, max_seq_len - 1])


ax2.set_title('DynaMine sidechain dynamics')
ax2.set_ylim([-0.2, 1.2])
ax2.set_xlabel('residue index')
ax2.set_ylabel('prediction values')
ax2.grid(axis='y')
ax2.set_xlim([0, max_seq_len - 1])


ax3.set_title('DynaMine conformational propensities: Coil')
ax3.set_ylim([-0.2, 1.2])
ax3.set_xlabel('residue index')
ax3.set_ylabel('prediction values')
ax3.grid(axis='y')
ax3.set_xlim([0, max_seq_len - 1])


ax4.set_title('DynaMine conformational propensities: Sheet')
ax4.set_ylim([-0.2, 1.2])
ax4.set_xlabel('residue index')
ax4.set_ylabel('prediction values')
ax4.grid(axis='y')
ax4.set_xlim([0, max_seq_len - 1])

ax5.set_title('DynaMine conformational propensities: ppII (polyproline II)')
ax5.set_ylim([-0.2, 1.2])
ax5.set_xlabel('residue index')
ax5.set_ylabel('prediction values')
ax5.grid(axis='y')
ax5.set_xlim([0, max_seq_len - 1])


ax6.set_title('DynaMine conformational propensities: Helix')
ax6.set_ylim([-0.2, 1.2])
ax6.set_xlabel('residue index')
ax6.set_ylabel('prediction values')
ax6.grid(axis='y')
ax6.set_xlim([0, max_seq_len - 1])


ax7.set_title('Early folding (EFoldMine)')
ax7.set_ylim([-0.2, 1.2])
ax7.set_xlabel('residue index')
ax7.set_ylabel('prediction values')
ax7.axhspan(-0.2, 0.169, alpha=0.5, color='yellow')
ax7.axhspan(0.169, 1.2, alpha=0.5, color='orange')
ax7.grid(axis='y')
ax7.set_xlim([0, max_seq_len - 1])


ax8.set_title('Disorder (disoMine)')
ax8.set_ylim([-0.2, 1.2])
ax8.set_xlabel('residue index')
ax8.set_ylabel('prediction values')
ax8.axhspan(0.5, 1.2, alpha=0.5, color='orange')
ax8.axhspan(-0.2, 0.5, alpha=0.5, color='yellow')
ax8.grid(axis='y')
ax8.set_xlim([0, max_seq_len - 1])

if len(keys) < 10: 
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.3), fancybox=True, shadow=True, ncol=len(keys))

plt.subplots_adjust(hspace=0.4)
plt.savefig('/content/results/single_sequence_predictons.png')
plt.show()

In [None]:
#@title Plot AgMata aggregation propensity
#@markdown The predictions reflect 'emerging' properties, so what the sequence 
#@markdown is capable of, not necessarily what it will do in a particular 
#@markdown context, for example when it adopts a specific fold.

#@markdown > These values are divided by a factor of 20 from the original. 
#@markdown > Peaks indicate residues likely to be involved in beta-sheet 
#@markdown > aggregation.

if beta_aggretation:

    fig, ax = plt.subplots(1, 1)
    fig.set_figwidth(30)
    fig.set_figheight(5)
    fig.suptitle('Agmata aggregation propensity')

    for seq_key in keys:
        predictions = all_predictions[seq_key]
        agmata_pred = predictions['agmata']
        ax.plot(range(len(agmata_pred)), agmata_pred, label=seq_key)

    ax.set_xlim([0, max_seq_len - 1])
    ax.set_xlabel('residue index')
    ax.set_ylabel('prediction values')
    ax.grid(axis='y')

    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), fancybox=True, shadow=True, ncol=len(keys))
    plt.savefig('/content/results/agmata_predictons.png')
    plt.show()

else:
    print("Beta aggregation tool was not selected")

In [None]:
#@title Plot Single Sequence predictions (3D projection)
#@markdown The predictions reflect 'emerging' properties, so what the sequence 
#@markdown is capable of, not necessarily what it will do in a particular 
#@markdown context, for example when it adopts a specific fold.

#@markdown #### DynaMine backbone dynamics
#@markdown > Values above 0.8 indicate rigid conformations, 
#@markdown > values above 1.0 membrane spanning regions, 
#@markdown > values below 0.69 flexible regions. 
#@markdown > Values between 0.69-0.80 are 'context' dependent and capable of
#@markdown > being either rigid or flexible.

#@markdown #### DynaMine sidechain dynamics
#@markdown > Higher values mean more likely rigid. 
#@markdown > These values are highly dependent on the amino acid type 
#@markdown > (i.e. a Trp will be rigid, an Asp flexible).

#@markdown #### DynaMine conformational propensities (sheet, helix, coil, ppII (polyproline II))
#@markdown > Higher values indicate higher propensities.

#@markdown #### EFoldMine earlyFolding propensity
#@markdown > Values above 0.169 indicate residues that are likely to start 
#@markdown > the protein folding process, based on only local interactions 
#@markdown > with other amino acids.

#@markdown #### Disomine disorder
#@markdown >  Values above 0.5 indicate that this is likely a disordered residue.

fig = plt.figure(figsize=plt.figaspect(0.5))
# axs = fig.add_subplot(2, 4, 1, projection='3d')

ax1 = fig.add_subplot(4, 2, 1, projection='3d')
ax2 = fig.add_subplot(4, 2, 2, projection='3d')
ax3 = fig.add_subplot(4, 2, 3, projection='3d')
ax4 = fig.add_subplot(4, 2, 4, projection='3d')
ax5 = fig.add_subplot(4, 2, 5, projection='3d')
ax6 = fig.add_subplot(4, 2, 6, projection='3d')
ax7 = fig.add_subplot(4, 2, 7, projection='3d')
ax8 = fig.add_subplot(4, 2, 8, projection='3d')

fig.set_figwidth(30)
fig.set_figheight(40)

fig.suptitle(f'Single Sequence Predictions (3D Projection)')

for index, seq_key in enumerate(keys):
    predictions = all_predictions[seq_key]
    x_position = range(len(predictions['seq']))
    z_position = [index] * len(predictions['seq'])

    if dynamics:
        backbone_pred = predictions['backbone']
        coil_pred = predictions['coil']
        sheet_pred = predictions['sheet']
        ppII_pred = predictions['ppII']
        helix_pred = predictions['helix']
        sidechain_pred = predictions['sidechain']

        ax1.plot(x_position, z_position, backbone_pred, label=seq_key)
        ax2.plot(x_position, z_position, sidechain_pred, label=seq_key)
        ax3.plot(x_position, z_position, coil_pred, label=seq_key)
        ax4.plot(x_position, z_position, sheet_pred, label=seq_key)
        ax5.plot(x_position, z_position, ppII_pred, label=seq_key)
        ax6.plot(x_position, z_position, helix_pred, label=seq_key)
    
    if disorder:
        disomine_pred = predictions['disoMine']
        ax7.plot(x_position, z_position, disomine_pred, label=seq_key)

    if early_folding:
        earlyFolding_pred = predictions['earlyFolding'] 
        ax8.plot(x_position, z_position, earlyFolding_pred, label=seq_key)

ax1.set_title('DynaMine backbone dynamics')
ax1.set_zlim([-0.1, 1.1])
ax1.set_xlabel('residue index')
ax1.set_zlabel('prediction values')
ax1.set_yticks(range(len(keys)))
ax1.set_yticklabels(keys)
ax1.view_init(60, -45)


ax2.set_title('DynaMine sidechain dynamics')
ax2.set_zlim([-0.1, 1.1])
ax2.set_xlabel('residue index')
ax2.set_zlabel('prediction values')
ax2.set_yticks(range(len(keys)))
ax2.set_yticklabels(keys)
ax2.view_init(60, -45)


ax3.set_title('DynaMine conformational propensities: Coil')
ax3.set_zlim([-0.1, 1.1])
ax3.set_xlabel('residue index')
ax3.set_zlabel('prediction values')
ax3.set_yticks(range(len(keys)))
ax3.set_yticklabels(keys)
ax3.view_init(60, -45)


ax4.set_title('DynaMine conformational propensities: Sheet')
ax4.set_zlim([-0.1, 1.1])
ax4.set_xlabel('residue index')
ax4.set_zlabel('prediction values')
ax4.set_yticks(range(len(keys)))
ax4.set_yticklabels(keys)
ax4.view_init(60, -45)


ax5.set_title('DynaMine conformational propensities: ppII (polyproline II)')
ax5.set_zlim([-0.1, 1.1])
ax5.set_xlabel('residue index')
ax5.set_zlabel('prediction values')
ax5.set_yticks(range(len(keys)))
ax5.set_yticklabels(keys)
ax5.view_init(60, -45)


ax6.set_title('DynaMine conformational propensities: Helix')
ax6.set_zlim([-0.1, 1.1])
ax6.set_xlabel('residue index')
ax6.set_ylabel('sequence')
ax6.set_zlabel('prediction values')
ax6.set_yticks(range(len(keys)))
ax6.set_yticklabels(keys)
ax6.view_init(60, -45)


ax7.set_title('Disorder (disoMine)')
ax7.set_zlim([-0.1, 1.1])
ax7.set_xlabel('residue index')
ax7.set_ylabel('sequence')
ax7.set_zlabel('prediction values')
ax7.set_yticks(range(len(keys)))
ax7.set_yticklabels(keys)
ax7.view_init(60, -45)


ax8.set_title('Early folding (EFoldMine)')
ax8.set_zlim([-0.1, 1.1])
ax8.set_xlabel('residue index')
ax8.set_ylabel('sequence')
ax8.set_zlabel('prediction values')
ax8.set_yticks(range(len(keys)))
ax8.set_yticklabels(keys)
ax8.view_init(60, -45)


plt.subplots_adjust(hspace=0.2)
plt.savefig('/content/results/single_sequence_predictons_3d.png')
plt.show()

In [None]:
#@title Plot AgMata aggregation propensity (3D projection)
#@markdown The predictions reflect 'emerging' properties, so what the sequence 
#@markdown is capable of, not necessarily what it will do in a particular 
#@markdown context, for example when it adopts a specific fold.

#@markdown > These values are divided by a factor of 20 from the original. 
#@markdown > Peaks indicate residues likely to be involved in beta-sheet 
#@markdown > aggregation.

if beta_aggretation:
    fig = plt.figure(figsize=plt.figaspect(0.5))
    ax1 = fig.add_subplot(1, 1, 1, projection='3d')

    fig.set_figwidth(15)
    fig.set_figheight(10)

    fig.suptitle(f'Single Sequence Predictions (3D Projection)')

    for index, seq_key in enumerate(keys):
        predictions = all_predictions[seq_key]
        x_position = range(len(predictions['seq']))
        z_position = [index] * len(predictions['seq'])

        agmata_pred = predictions['agmata']

        ax1.plot(x_position, z_position, agmata_pred, label=seq_key)

    ax1.set_title('Agmata aggregation propensity')
    ax1.set_xlabel('residue index')
    ax1.set_zlabel('prediction values')
    ax1.set_yticks(range(len(keys)))
    ax1.set_yticklabels(keys)
    ax1.view_init(60, -45)

    plt.savefig('/content/results/agmata_predictons_3d.png')
    plt.show()

else:
    print("Beta aggregation was not selected")

In [None]:
#@title Download the results
#@markdown After running this cell you will be able to download the plots
#@markdown plain results in JSON format to your computer.

json.dump(all_predictions, open('/content/results/b2btools_results.json', 'w'), indent=4, sort_keys=True)

!zip -r /content/b2b-tools-results.zip /content/results

files.download("/content/b2b-tools-results.zip")