<a href="https://colab.research.google.com/github/Bio2Byte/public_notebooks/blob/main/Bio2ByteTools_v3_singleseq_pypi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Bio2Byte - Single Sequence Predictions
For most of below tools, we now have the b2bTools one-stop file upload location where you can easily try them: [online b2BTools](https://bio2byte.be/b2btools/)

In [None]:
#@title Install dependencies
%%capture

!pip install b2bTools==3.0.5b2

In [None]:
#@title Import dependencies
import os
from b2bTools import SingleSeq
from matplotlib import pyplot as plt

In [None]:
#@title Input protein sequences
%%capture

query_sequence_1 = 'MAKSTILALLALVLVAHASAMRRERGRQGDSSSCERQVDRVNLKPCEQHIMQRIMGEQEQYDSYDIRSTRSSDQQQRCCDELNEMENTQRCMCEALQQIMENQCDRLQDRQMVQQFKRELMNLPQQCNFRAPQRCDLDVSGGRC' #@param {type:"string"}
query_sequence_2 = '' #@param {type:"string"}
query_sequence_3 = '' #@param {type:"string"}
query_sequence_4 = '' #@param {type:"string"}
query_sequence_5 = '' #@param {type:"string"}
query_sequence_6 = '' #@param {type:"string"}

fasta = f'>SEQ_1\n{query_sequence_1}\n'

if query_sequence_2 != '':
    fasta += f'>SEQ_2\n{query_sequence_2}\n'
if query_sequence_3 != '':
    fasta += f'>SEQ_3\n{query_sequence_3}\n'
if query_sequence_4 != '':
    fasta += f'>SEQ_4\n{query_sequence_4}\n'
if query_sequence_5 != '':
    fasta += f'>SEQ_5\n{query_sequence_5}\n'
if query_sequence_6 != '':
    fasta += f'>SEQ_6\n{query_sequence_6}\n'

with open("input_example1.fasta", "w") as f:
  f.write(fasta)

single_seq = SingleSeq("/content/input_example1.fasta")
single_seq.predict(tools=['dynamine', 'efoldmine', 'disomine', 'agmata', 'psp'])

all_predictions = single_seq.get_all_predictions()
keys = list(all_predictions.keys())

max_seq_len = max([len(pred['seq']) for pred in all_predictions.values()])

In [None]:
#@title Plot Single Sequence predictions
#@markdown The predictions reflect 'emerging' properties, so what the sequence 
#@markdown is capable of, not necessarily what it will do in a particular 
#@markdown context, for example when it adopts a specific fold.

#@markdown #### DynaMine backbone dynamics
#@markdown > Values above 0.8 indicate rigid conformations, 
#@markdown > values above 1.0 membrane spanning regions, 
#@markdown > values below 0.69 flexible regions. 
#@markdown > Values between 0.69-0.80 are 'context' dependent and capable of
#@markdown > being either rigid or flexible.

#@markdown #### DynaMine sidechain dynamics
#@markdown > Higher values mean more likely rigid. 
#@markdown > These values are highly dependent on the amino acid type 
#@markdown > (i.e. a Trp will be rigid, an Asp flexible).

#@markdown #### DynaMine conformational propensities (sheet, helix, coil, ppII (polyproline II))
#@markdown > Higher values indicate higher propensities.

#@markdown #### EFoldMine earlyFolding propensity
#@markdown > Values above 0.169 indicate residues that are likely to start 
#@markdown > the protein folding process, based on only local interactions 
#@markdown > with other amino acids.

#@markdown #### Disomine disorder
#@markdown >  Values above 0.5 indicate that this is likely a disordered residue.


fig, axs = plt.subplots(2, 4)

ax1 = axs[0, 0]
ax2 = axs[0, 1]
ax3 = axs[0, 2]
ax4 = axs[0, 3]
ax5 = axs[1, 0]
ax6 = axs[1, 1]
ax7 = axs[1, 2]
ax8 = axs[1, 3]

fig.set_figwidth(30)
fig.set_figheight(10)

fig.suptitle(f'Single Sequence Predictions')

for seq_key in keys:
    predictions = all_predictions[seq_key]
    x_position = range(len(predictions['seq']))

    backbone_pred = predictions['backbone']
    coil_pred = predictions['coil']
    sheet_pred = predictions['sheet']
    ppII_pred = predictions['ppII']
    helix_pred = predictions['helix']
    sidechain_pred = predictions['sidechain']
    disomine_pred = predictions['disoMine']
    earlyFolding_pred = predictions['earlyFolding'] 
    agmata_pred = predictions['agmata']

    ax1.plot(x_position, backbone_pred, label=seq_key)
    ax2.plot(x_position, sidechain_pred, label=seq_key)
    ax3.plot(x_position, coil_pred, label=seq_key)
    ax4.plot(x_position, sheet_pred, label=seq_key)
    ax5.plot(x_position, ppII_pred, label=seq_key)
    ax6.plot(x_position, helix_pred, label=seq_key)
    ax7.plot(x_position, disomine_pred, label=seq_key)
    ax8.plot(x_position, earlyFolding_pred, label=seq_key)

ax1.set_title('DynaMine backbone dynamics')
ax1.set_ylim([-0.1, 1.1])
ax1.set_xlabel('residue index')
ax1.set_ylabel('prediction values')
ax1.axhspan(1, 1.1, alpha=0.3, color='red')
ax1.axhspan(0.8, 1, alpha=0.5, color='pink')
ax1.axhspan(0.69, 0.8, alpha=0.5, color='orange')
ax1.axhspan(0, 0.69, alpha=0.5, color='yellow')
ax1.grid(axis='y')
ax1.set_xlim([0, max_seq_len])


ax2.set_title('DynaMine sidechain dynamics')
ax2.set_ylim([-0.1, 1.1])
ax2.set_xlabel('residue index')
ax2.set_ylabel('prediction values')
ax2.grid(axis='y')
ax2.set_xlim([0, max_seq_len])


ax3.set_title('DynaMine conformational propensities: Coil')
ax3.set_ylim([-0.1, 1.1])
ax3.set_xlabel('residue index')
ax3.set_ylabel('prediction values')
ax3.grid(axis='y')
ax3.set_xlim([0, max_seq_len])


ax4.set_title('DynaMine conformational propensities: Sheet')
ax4.set_ylim([-0.1, 1.1])
ax4.set_xlabel('residue index')
ax4.set_ylabel('prediction values')
ax4.grid(axis='y')
ax4.set_xlim([0, max_seq_len])

ax5.set_title('DynaMine conformational propensities: ppII (polyproline II)')
ax5.set_ylim([-0.1, 1.1])
ax5.set_xlabel('residue index')
ax5.set_ylabel('prediction values')
ax5.grid(axis='y')
ax5.set_xlim([0, max_seq_len])


ax6.set_title('DynaMine conformational propensities: Helix')
ax6.set_ylim([-0.1, 1.1])
ax6.set_xlabel('residue index')
ax6.set_ylabel('prediction values')
ax6.grid(axis='y')
ax6.set_xlim([0, max_seq_len])


ax7.set_title('Early folding (EFoldMine)')
ax7.set_ylim([-0.1, 1.1])
ax7.set_xlabel('residue index')
ax7.set_ylabel('prediction values')
ax7.axhspan(0, 0.169, alpha=0.5, color='yellow')
ax7.axhspan(0.169, 1.1, alpha=0.5, color='orange')
ax7.grid(axis='y')
ax7.set_xlim([0, max_seq_len])


ax8.set_title('Disorder (disoMine)')
ax8.set_ylim([-0.1, 1.1])
ax8.set_xlabel('residue index')
ax8.set_ylabel('prediction values')
ax8.axhspan(0.5, 1.1, alpha=0.5, color='orange')
ax8.axhspan(0, 0.5, alpha=0.5, color='yellow')
ax8.grid(axis='y')
ax8.set_xlim([0, max_seq_len])

plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.3), fancybox=True, shadow=True, ncol=len(keys))
plt.subplots_adjust(hspace=0.4)
plt.show()

In [None]:
#@title Plot AgMata aggregation propensity
#@markdown The predictions reflect 'emerging' properties, so what the sequence 
#@markdown is capable of, not necessarily what it will do in a particular 
#@markdown context, for example when it adopts a specific fold.

#@markdown > These values are divided by a factor of 20 from the original. 
#@markdown > Peaks indicate residues likely to be involved in beta-sheet 
#@markdown > aggregation.

fig, ax = plt.subplots(1, 1)
fig.set_figwidth(30)
fig.set_figheight(5)
fig.suptitle('Agmata aggregation propensity')

for seq_key in keys:
    predictions = all_predictions[seq_key]
    agmata_pred = predictions['agmata']
    ax.plot(range(len(agmata_pred)), agmata_pred, label=seq_key)

ax.set_xlim([0, max_seq_len])
ax.set_xlabel('residue index')
ax.set_ylabel('prediction values')
ax.grid(axis='y')

plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), fancybox=True, shadow=True, ncol=len(keys))
plt.show()

In [None]:
#@title Plot PSPer
#@markdown PSP (Phase Separating Protein) enables you to predict whether a 
#@markdown protein is likely to phase-separate with a particular mechanism 
#@markdown involving RNA interacts (FUS-like proteins). 
#@markdown It will highlight the regions in your protein that are involved 
#@markdown mechanistically, and provide an overall score.

for seq_key in keys:
    predictions = all_predictions[seq_key]
    sequence = predictions['seq']
    x = range(len(sequence))

    viterbi_path = predictions['viterbi']
    color_map = lambda step: '#ffb499' if step == ' RRM' else '#9999ff' if step == ' PLD' else '#bfbfbf' if step == ' SPACER' else '#99ff99' if step == ' OTHER' else '#fff'
    viterbi_colors = list(map(color_map, viterbi_path))

    fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(1, 5)
    fig.set_figwidth(30)
    fig.set_figheight(5)

    fig.suptitle(f'PSPer {seq_key} (PSPer score: {predictions["protein_score"]:2.3})')

    # Complexity
    ax1.plot(x, predictions['complexity'])
    ax1.set_title('Complexity')
    ax1.set_ylabel('prediction values')
    ax1.set_xlabel('residue index')
    ax1.set_xlim([0, len(sequence)])

    for i in x:
        ax1.axvspan(i, i+1, facecolor=viterbi_colors[i])

    # Arg Enrichment
    ax2.plot(x, predictions['arg'])
    ax2.set_title('Arg Enrichment')
    ax2.set_ylabel('prediction values')
    ax2.set_xlabel('residue index')
    ax2.set_xlim([0, len(sequence)])

    for i in x:
        ax2.axvspan(i, i+1, facecolor=viterbi_colors[i])

    # Tyr Enrichment
    ax3.plot(x, predictions['tyr'])
    ax3.set_title('Tyr Enrichment')
    ax3.set_ylabel('prediction values')
    ax3.set_xlabel('residue index')
    ax3.set_xlim([0, len(sequence)])

    for i in x:
        ax3.axvspan(i, i+1, facecolor=viterbi_colors[i])

    # RRM 
    ax4.plot(x, predictions['RRM'])
    ax4.set_title('RRM')
    ax4.set_ylabel('prediction values')
    ax4.set_xlabel('residue index')
    ax4.set_xlim([0, len(sequence)])

    for i in x:
        ax4.axvspan(i, i+1, facecolor=viterbi_colors[i])

    # Disorder
    ax5.plot(x, predictions['disorder'])
    ax5.set_title('Disorder')
    ax5.set_ylabel('prediction values')
    ax5.set_xlabel('residue index')
    ax5.set_xlim([0, len(sequence)])

    for i in x:
        ax5.axvspan(i, i+1, facecolor=viterbi_colors[i])

plt.show()

In [None]:
#@title Plot Single Sequence predictions (3D projection)
#@markdown The predictions reflect 'emerging' properties, so what the sequence 
#@markdown is capable of, not necessarily what it will do in a particular 
#@markdown context, for example when it adopts a specific fold.

#@markdown #### DynaMine backbone dynamics
#@markdown > Values above 0.8 indicate rigid conformations, 
#@markdown > values above 1.0 membrane spanning regions, 
#@markdown > values below 0.69 flexible regions. 
#@markdown > Values between 0.69-0.80 are 'context' dependent and capable of
#@markdown > being either rigid or flexible.

#@markdown #### DynaMine sidechain dynamics
#@markdown > Higher values mean more likely rigid. 
#@markdown > These values are highly dependent on the amino acid type 
#@markdown > (i.e. a Trp will be rigid, an Asp flexible).

#@markdown #### DynaMine conformational propensities (sheet, helix, coil, ppII (polyproline II))
#@markdown > Higher values indicate higher propensities.

#@markdown #### EFoldMine earlyFolding propensity
#@markdown > Values above 0.169 indicate residues that are likely to start 
#@markdown > the protein folding process, based on only local interactions 
#@markdown > with other amino acids.

#@markdown #### Disomine disorder
#@markdown >  Values above 0.5 indicate that this is likely a disordered residue.

fig = plt.figure(figsize=plt.figaspect(0.5))
# axs = fig.add_subplot(2, 4, 1, projection='3d')

ax1 = fig.add_subplot(4, 2, 1, projection='3d')
ax2 = fig.add_subplot(4, 2, 2, projection='3d')
ax3 = fig.add_subplot(4, 2, 3, projection='3d')
ax4 = fig.add_subplot(4, 2, 4, projection='3d')
ax5 = fig.add_subplot(4, 2, 5, projection='3d')
ax6 = fig.add_subplot(4, 2, 6, projection='3d')
ax7 = fig.add_subplot(4, 2, 7, projection='3d')
ax8 = fig.add_subplot(4, 2, 8, projection='3d')

fig.set_figwidth(30)
fig.set_figheight(40)

fig.suptitle(f'Single Sequence Predictions (3D Projection)')

for index, seq_key in enumerate(keys):
    predictions = all_predictions[seq_key]
    x_position = range(len(predictions['seq']))
    z_position = [index] * len(predictions['seq'])

    backbone_pred = predictions['backbone']
    coil_pred = predictions['coil']
    sheet_pred = predictions['sheet']
    ppII_pred = predictions['ppII']
    helix_pred = predictions['helix']
    sidechain_pred = predictions['sidechain']
    disomine_pred = predictions['disoMine']
    earlyFolding_pred = predictions['earlyFolding'] 
    agmata_pred = predictions['agmata']

    ax1.plot(x_position, z_position, backbone_pred, label=seq_key)
    ax2.plot(x_position, z_position, sidechain_pred, label=seq_key)
    ax3.plot(x_position, z_position, coil_pred, label=seq_key)
    ax4.plot(x_position, z_position, sheet_pred, label=seq_key)
    ax5.plot(x_position, z_position, ppII_pred, label=seq_key)
    ax6.plot(x_position, z_position, helix_pred, label=seq_key)
    ax7.plot(x_position, z_position, disomine_pred, label=seq_key)
    ax8.plot(x_position, z_position, earlyFolding_pred, label=seq_key)

ax1.set_title('DynaMine backbone dynamics')
ax1.set_zlim([-0.1, 1.1])
ax1.set_xlabel('residue index')
ax1.set_zlabel('prediction values')
ax1.set_yticks(range(len(keys)))
ax1.set_yticklabels(keys)
ax1.view_init(60, -45)


ax2.set_title('DynaMine sidechain dynamics')
ax2.set_zlim([-0.1, 1.1])
ax2.set_xlabel('residue index')
ax2.set_zlabel('prediction values')
ax2.set_yticks(range(len(keys)))
ax2.set_yticklabels(keys)
ax2.view_init(60, -45)


ax3.set_title('DynaMine conformational propensities: Coil')
ax3.set_zlim([-0.1, 1.1])
ax3.set_xlabel('residue index')
ax3.set_zlabel('prediction values')
ax3.set_yticks(range(len(keys)))
ax3.set_yticklabels(keys)
ax3.view_init(60, -45)


ax4.set_title('DynaMine conformational propensities: Sheet')
ax4.set_zlim([-0.1, 1.1])
ax4.set_xlabel('residue index')
ax4.set_zlabel('prediction values')
ax4.set_yticks(range(len(keys)))
ax4.set_yticklabels(keys)
ax4.view_init(60, -45)


ax5.set_title('DynaMine conformational propensities: ppII (polyproline II)')
ax5.set_zlim([-0.1, 1.1])
ax5.set_xlabel('residue index')
ax5.set_zlabel('prediction values')
ax5.set_yticks(range(len(keys)))
ax5.set_yticklabels(keys)
ax5.view_init(60, -45)


ax6.set_title('DynaMine conformational propensities: Helix')
ax6.set_zlim([-0.1, 1.1])
ax6.set_xlabel('residue index')
ax6.set_ylabel('sequence')
ax6.set_zlabel('prediction values')
ax6.set_yticks(range(len(keys)))
ax6.set_yticklabels(keys)
ax6.view_init(60, -45)


ax7.set_title('Disorder (disoMine)')
ax7.set_zlim([-0.1, 1.1])
ax7.set_xlabel('residue index')
ax7.set_ylabel('sequence')
ax7.set_zlabel('prediction values')
ax7.set_yticks(range(len(keys)))
ax7.set_yticklabels(keys)
ax7.view_init(60, -45)


ax8.set_title('Early folding (EFoldMine)')
ax8.set_zlim([-0.1, 1.1])
ax8.set_xlabel('residue index')
ax8.set_ylabel('sequence')
ax8.set_zlabel('prediction values')
ax8.set_yticks(range(len(keys)))
ax8.set_yticklabels(keys)
ax8.view_init(60, -45)


plt.subplots_adjust(hspace=0.2)
plt.show()

In [None]:
#@title Plot AgMata aggregation propensity (3D projection)
#@markdown The predictions reflect 'emerging' properties, so what the sequence 
#@markdown is capable of, not necessarily what it will do in a particular 
#@markdown context, for example when it adopts a specific fold.

#@markdown > These values are divided by a factor of 20 from the original. 
#@markdown > Peaks indicate residues likely to be involved in beta-sheet 
#@markdown > aggregation.

fig = plt.figure(figsize=plt.figaspect(0.5))
ax1 = fig.add_subplot(1, 1, 1, projection='3d')

fig.set_figwidth(15)
fig.set_figheight(10)

fig.suptitle(f'Single Sequence Predictions (3D Projection)')

for index, seq_key in enumerate(keys):
    predictions = all_predictions[seq_key]
    x_position = range(len(predictions['seq']))
    z_position = [index] * len(predictions['seq'])

    agmata_pred = predictions['agmata']

    ax1.plot(x_position, z_position, agmata_pred, label=seq_key)

ax1.set_title('Agmata aggregation propensity')
ax1.set_xlabel('residue index')
ax1.set_zlabel('prediction values')
ax1.set_yticks(range(len(keys)))
ax1.set_yticklabels(keys)
ax1.view_init(60, -45)

plt.show()