<a href="https://colab.research.google.com/github/Bio2Byte/public_notebooks/blob/main/Bio2ByteTools_v3_multipleseq_pypi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title 1. Install the Bio2Byte tools package

#@markdown Once this cell has been executed, a zip-archive with
#@markdown the obtained prediction will be automatically downloaded
#@markdown to your computer.

!pip install b2bTools==3.0.1b5

In [None]:
#@title 2. Upload your MSA file

#@markdown Once this cell has been executed, your target MSA file
#@markdown will be ready to run the predictions on it
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('MSA file "{name}" with length {length} bytes uploaded with success'.format(
      name=fn, length=len(uploaded[fn])))
  msa_filename = fn

In [None]:
#@title 3. Get the predictions for each sequence of the alignment

#@markdown Once this cell has been executed, the Bio2Byte tools predict
#@markdown the values sequence by sequences of the alignment

from b2bTools.multipleSeq.Predictor import MineSuiteMSA

msaSuite = MineSuiteMSA()
msaSuite.predictAndMapSeqsFromMSA("/content/" + msa_filename)
msaSuite.getDistributions()

In [None]:
#@title 4. Save the predictions to json files

#@markdown Once this cell has been executed, a json file with
#@markdown the obtained predictions will be automatically created for each
#@markdown sequence of the alignment inside the /content/results folder

import os
from os import path

if not path.exists("/content/results"):
    os.mkdir("/content/results", )

for id, seq in msaSuite.seqs:
    print("PROCESSING SEQUENCE: id={id}".format(id=id))

    json_results = msaSuite.getAllPredictionsJson(identifier=id)
    # print(json_results)
    with open("/content/results/{msa}_{id}.json".format(msa=msa_filename.replace("/", "_"), id=id.replace("/", "_")), "w") as file_output:
        file_output.write(json_results)


In [None]:
#@title 5. Download the predictions

#@markdown Once this cell has been executed, a zip-archive with
#@markdown the obtained predictions will be automatically downloaded
#@markdown to your computer.

!zip -r /content/b2b-tools-msa.zip /content/results

files.download("/content/b2b-tools-msa.zip")

In [None]:
#@title 6.Plot the results
#@markdown Once this cell has been executed, a plot
#@markdown with the distributions will be render.


import numpy as np
import matplotlib.pyplot as plt

def plot_msa_distrib(jsondata_list, mutation=False):
    colors = ['blue', 'orange']

    # These for loops got too complicated, I have to think
    # something simpler to handle the None values in the data
    for biophys_data in jsondata_list[0].keys():
        for data, col in zip(jsondata_list, colors):
            none_idx = []
            for n in range(len(data[biophys_data]['median'])):
                if data[biophys_data]['median'][n] == None \
                        or data[biophys_data][
                    'firstQuartile'][n] == None \
                        or data[biophys_data][
                    'thirdQuartile'][n] == None:
                    none_idx.append(n)

            range_list = []
            for n in range(len(none_idx)):
                try:
                    if none_idx[n] + 1 != none_idx[n + 1]:
                        range_list.append(
                            (none_idx[n] + 1, none_idx[n + 1]))
                    else:
                        continue
                except:
                    if len(none_idx) == 1:
                        range_list.append((0, none_idx[0]))
                        range_list.append((none_idx[0] + 1, len(
                            data[biophys_data][
                                'median'])))

                    else:
                        range_list.append((0, none_idx[0]))
                        range_list.append((none_idx[-1] + 1, len(
                            data[biophys_data][
                                'median'])))

            # When there are None values in the data
            if range_list:
                for tuple in range_list:
                    x = np.arange(tuple[0], tuple[1], 1)
                    firstq = \
                        data[biophys_data][
                            'firstQuartile'][
                        tuple[0]:tuple[1]]
                    thirdq = \
                        data[biophys_data][
                            'thirdQuartile'][
                        tuple[0]:tuple[1]]
                    bottom = \
                        data[biophys_data][
                            'bottomOutlier'][
                        tuple[0]:tuple[1]]
                    top = \
                        data[biophys_data]['topOutlier'][
                        tuple[0]:tuple[1]]
                    plt.fill_between(
                        x, firstq, thirdq, alpha=0.5, color=col)
                    plt.fill_between(
                        x, bottom, top, alpha=0.25, color=col)

            # When there aren't None values in the data
            else:
                x = np.arange(0, len(
                    data[biophys_data]['median']), 1)
                firstq = data[biophys_data][
                    'firstQuartile']
                thirdq = data[biophys_data][
                    'thirdQuartile']
                bottom = data[biophys_data][
                    'bottomOutlier']
                top = data[biophys_data]['topOutlier']
                plt.fill_between(
                    x, firstq, thirdq, alpha=0.5, color=col)
                plt.fill_between(
                    x, bottom, top, alpha=0.25, color=col)

            plt.plot(data[biophys_data]['median'],
                     linewidth=1, color=col)

            if mutation:
                print(mutation)
                plt.plot(mutation['results'][biophys_data],
                         linewidth=0.5, color='red')

        plt.axis(
            [0, len(data[biophys_data]['median']), 0, 1.1])
        plt.ylabel(biophys_data)
        plt.xlabel('Residue position')

        plt.show()
  
jsondata_list = [msaSuite.alignedPredictionDistribs]
plot_msa_distrib(jsondata_list, mutation=False)