In [1]:
# Importing the libraries
from functions import *

### Creating the dataframe of disordered regions

In [22]:
# Set the maximum width of the columns
pd.set_option('display.max_colwidth', 20)

In [23]:
# Open XML file as a dataframe
input_file = '{}/databases/uniprot/curated_uniprot.fasta_75'.format(directory) # change the file name if necessary
df = blast_parser(input_file)
print(f'The number of instances: {len(df)}')
df.head()

The number of instances: 8656


Unnamed: 0,query_id,subject_id,query_len,hsp_len,query_seq,match_seq,subject_seq,query_start,query_end,subject_start,subject_end,identity,positive,gaps,eval,bit_score,count
0,Q16620,A0A2R9BM51,822,822,MSSWIRWHGPAMARLW...,MSSWIRWHGPAMARLW...,MSSWIRWHGPAMARLW...,1,822,1,822,822,822,0,0.0,4458.0,200
1,Q16620,A0A4X2LP40,822,824,MSSWIRWHGPAMARLW...,M SW + HGP MARL ...,MLSWKKCHGPGMARLL...,1,822,1,824,689,745,2,0.0,3749.0,200
2,Q16620,A0A4X2LDU8,822,822,MSSWIRWHGPAMARLW...,M SW + HGP MARL ...,MLSWKKCHGPGMARLL...,1,822,1,821,689,745,1,0.0,3756.0,200
3,Q16620,A0A6P5IKH1,822,824,MSSWIRWHGPAMARLW...,M SW + HGP MARL ...,MLSWKKCHGPGMARLL...,1,822,1,824,691,745,2,0.0,3764.0,200
4,Q16620,A0A7J7UQA2,822,741,TSCKCSASRIWCSDPS...,+SCKCSASRIWCSDP ...,SSCKCSASRIWCSDPI...,34,774,2,739,710,724,3,0.0,3776.0,200


In [29]:
# Create a dataframe with Disprot instances from curated.mjson database
data = list()

with open('curated.mjson', 'r') as file:
    for line in file:
        obj = json.loads(line)
        rows = json_parser(obj)
        data.extend(rows)

curated_disprot = pd.DataFrame(data)

# Calculate the length of disordered regions
curated_disprot['length'] = curated_disprot['end'] - curated_disprot['start'] + 1
curated_disprot = curated_disprot[(curated_disprot['feature'] == 'disorder') & (curated_disprot['source'] == 'disprot')]
curated_disprot['region'] = curated_disprot.apply(lambda row: f"{row['acc']}_{row['start']}-{row['end']}", axis=1)
curated_disprot.to_csv('curated_disprot.csv', index=False)

print(f'The number of instances in the Curated Disprot database: {len(curated_disprot)}')
curated_disprot[:20]

The number of instances in the Curated Disprot database: 3151


Unnamed: 0,acc,evidence,feature,source,start,end,length,region
6927,P03265,curated,disorder,disprot,294,334,41,P03265_294-334
6928,P03265,curated,disorder,disprot,454,464,11,P03265_454-464
6929,P49913,curated,disorder,disprot,134,170,37,P49913_134-170
6930,P03045,curated,disorder,disprot,1,107,107,P03045_1-107
6931,P00004,curated,disorder,disprot,1,105,105,P00004_1-105
6932,P27695,curated,disorder,disprot,1,43,43,P27695_1-43
6933,Q64693,curated,disorder,disprot,1,256,256,Q64693_1-256
6934,P32774,curated,disorder,disprot,89,104,16,P32774_89-104
6935,P0DMM9,curated,disorder,disprot,63,76,14,P0DMM9_63-76
6936,P0DMM9,curated,disorder,disprot,215,260,46,P0DMM9_215-260


In [27]:
# Keep only disordered regions in the initial dataframe filtering with curated_disordered dataframe
disordered = df[df['query_id'].isin(curated_disprot['acc'])]
disordered.to_csv('disordered_df.csv', index=False)

print(f'The number of rows with the disordered regions: {len(disordered)}')
disordered.head()

The number of rows with the disordered regions: 2887


Unnamed: 0,query_id,subject_id,query_len,hsp_len,query_seq,match_seq,subject_seq,query_start,query_end,subject_start,subject_end,identity,positive,gaps,eval,bit_score,count
200,Q9H832,A0A6J2FM24,354,356,MAESPTEEAATA--GA...,MAESPTEEAATA GA...,MAESPTEEAATATAGA...,1,354,1,354,350,350,4,0.0,1851.0,200
201,Q9H832,A0A3Q7W6Y2,354,356,MAESPTEEAATA--GA...,MAESPTEEAATA GA...,MAESPTEEAATATAGA...,1,354,1,354,350,350,4,0.0,1851.0,200
202,Q9H832,A0A2U3VK69,354,356,MAESPTEEAATA--GA...,MAESPTEEAATA GA...,MAESPTEEAATATAGA...,1,354,1,354,350,350,4,0.0,1851.0,200
203,Q9H832,A0A2Y9JVH5,354,358,MAESPTEEAATA----...,MAESPTEEAATA ...,MAESPTEEAATATATA...,1,354,1,356,351,351,6,0.0,1854.0,200
204,Q9H832,A0A8C7ALE4,354,358,MAESPTEEAATA----...,MAESPTEEAATA ...,MAESPTEEAATATATA...,1,354,1,356,351,351,6,0.0,1854.0,200


In [28]:
# Update the curated_disprot leaving only the instances from the XML file
curated_disprot = curated_disprot[curated_disprot['acc'].isin(disordered['query_id'])]
print(f'The number of disordered regions in curated_uniprot.fasta_75: {len(curated_disprot)}')
curated_disprot.head()

The number of disordered regions in curated_uniprot.fasta_75: 21


Unnamed: 0,acc,evidence,feature,source,start,end,length,region
7271,Q99967,curated,disorder,disprot,220,269,50,Q99967_220-269
7799,Q9H832,curated,disorder,disprot,1,99,99,Q9H832_1-99
7800,Q9H832,curated,disorder,disprot,327,354,28,Q9H832_327-354
7821,Q86FP8,curated,disorder,disprot,20,80,61,Q86FP8_20-80
8218,Q8IW19,curated,disorder,disprot,399,420,22,Q8IW19_399-420


In [6]:
# Select the disordered regions IDs and positions
dis_regs = set()

for i, row in disordered.iterrows():
    dis_id = row[0]
    matching_row = curated_disprot[curated_disprot['acc'] == dis_id]
    if not matching_row.empty:
        region = matching_row['region']
        dis_regs.update(region)

print(dis_regs, len(dis_regs))

{'Q99967_220-269', 'Q8R464_25-120', 'Q8IU57_270-299', 'Q93KQ4_51-81', 'Q5T4W7_108-120', 'Q9H832_1-99', 'Q8IW19_399-420', 'Q9H832_327-354', 'Q8K4J6_111-142', 'Q86FP8_20-80', 'Q9H0E2_1-53', 'Q9CXY6_29-44', 'Q9BYF1_769-805', 'Q84852_17-299', 'Q9CXY6_347-390', 'Q9BYI3_149-253', 'Q8K4J6_155-186', 'Q8IW19_450-511', 'Q5VZK9_1005-1020', 'Q8WUG5_1-105', 'Q8K4J6_67-98'} 21


In [7]:
# Define an array of disordered regions ids
# disprot_ids = disordered["query_id"].unique()
disprot_ids = dis_regs

# Dropdown list of Uniprot query IDs for disordered regions
output = widgets.Select(options=disprot_ids,
    rows=10,
    description='Uniprot ID: ',
    layout={'width': 'max-content'},
    disabled=False)

display(output)

Select(description='Uniprot ID: ', layout=Layout(width='max-content'), options=('Q99967_220-269', 'Q8R464_25-1…

In [8]:
# Check if there are disordered regions for a particular query ID
id_dis = output.value
id_split = id_dis.split("_")[0]
# i = 1 # change to the necessary region
# store the same id for all notebooks
%store id_dis id_split
# %store i
selected_dis = disordered[disordered["query_id"].isin([id_dis.split("_")[0]])] # the information for one query ID
# selected_dis.head()

Stored 'id_dis' (str)
Stored 'id_split' (str)


In [9]:
# Filter the curated_disprot dataframe based on selected ID
# curated_query = curated_disprot[curated_disprot['acc'] == id_dis]
curated_query = curated_disprot[curated_disprot['region'] == id_dis]
curated_query

Unnamed: 0,acc,evidence,feature,source,start,end,length,region
7271,Q99967,curated,disorder,disprot,220,269,50,Q99967_220-269


## 1. Multiple Sequence Alignment

Within this framework, we will compare the MSA results obtained directly from the BLAST output with those generated from the ClustalOmega and MAFFT.

### 1.1 MSA from the BLAST output
This code iterates through each row of the dataframe for a previously selected query ID. If there is no gap, it maps the amino acid from the subject sequence to the corresponding position in the query sequence, starting from the beginning of the query sequence. The resulting mapped amino acids are then added to a new count. In the end we have a FASTA file with the subject IDs and the aligned sequences.

In [10]:
# # Retrieve query sequence and its length from the disordered dataframe
# query_sequence = disordered[disordered['query_id'] == id_split]['query_seq'].unique()[0]
# query_sequence = re.sub(r'[-]', '', query_sequence)
# query_len = disordered[disordered['query_id'] == id_split]['query_len'].unique()[0]
# print(id_dis, query_len, query_sequence)

In [11]:
# Extract the data for the disordered regions
uniprot_ids = disordered["query_id"].unique()

data = []

# Loop through disprot_ids
for i in uniprot_ids:
    query_sequence = disordered[disordered['query_id'] == i]['query_seq'].unique()[0]
    query_sequence = re.sub(r'[-]', '', query_sequence)
    query_len = len(query_sequence)
    
    # Append the data to the list
    data.append([i, query_sequence, query_len])

In [12]:
# Create a dataframe of disprot sequences
disprot_sequences = pd.DataFrame(data, columns=['disprot_id', 'query_sequence', 'query_len'])
disprot_sequences.head()

Unnamed: 0,disprot_id,query_sequence,query_len
0,Q9H832,MAESPTEEAATAGAGAAGPGASSVAGVVGVSGSGGGFGPPFLPDVW...,354
1,Q8IW19,MSGGFELQPRDGGPRVALAPGETVIGRGPLLGITDKRVSRRHAILE...,511
2,Q99967,MADHMMAMNHGRFPDGTNGLHHHPAHRMGMGQFPSPHHHQQQQPQH...,270
3,Q9CXY6,MRGDRGRGRGGRFGSRGGPGGGFRPFVPHIPFDFYLCEMAFPRVKP...,390
4,Q8R464,PLLLLWAAAAGPGTGQEVQTENVTVAEGGVAEITCRLHQYDGSIVV...,379


In [13]:
# # 1.2 Build the MSA from the BLAST - for all alignments
# # Iterate through each row in the disprot_sequences 
# for ind_q, row_q in disprot_sequences.iterrows():
#     query_id = row_q['disprot_id']
#     query_sequence = row_q['query_sequence']
#     query_len = row_q['query_len']
#     selected_dis = disordered[disordered["query_id"].isin([query_id])]
#     print(ind_q, query_id, query_len, query_sequence[:3],"...", query_sequence[-3:])
    
#     # Save the file
#     out_file = f'{directory}/results/alignments/output_files/blast/{query_id}_blast.fasta'
    
#     # Initialize the first row with a length of the query sequence
#     with open(out_file, "w") as fout:
#         mapped_seq = ["-"] * query_len

#         # Write the header line for the query sequence
#         fout.write(">{}\n".format(query_id))

#         # Map the query sequence to the mapped_seq list
#         c = 0
#         for l_q in query_sequence:
#             if l_q != ' ' and l_q != '-':
#                 mapped_seq[c] = l_q
#                 c += 1

#         # Write the query_mapped_seq sequence to the output file
#         fout.write("{}\n".format("".join(mapped_seq)))

#         # Map the subject sequences to the mapped_seq list and write to the output file
#         for index, row in selected_dis.iterrows():
#             if query_id == row['subject_id']:
#                 continue
                
#             c = 0
#             query_start = row['query_start']
#             for l_q, l_s in zip(row['query_seq'], row['subject_seq']):
#                 if l_q != ' ' and l_q != '-': # if the initial aa from query is not empty or gapped
#                     if query_start + c - 1 < len(mapped_seq): # added the condition
#                         mapped_seq[query_start + c - 1] = l_s if l_s != ' ' else '-' # assign aa to subject
#                         c += 1
#             fout.write(">{}\n{}\n".format(row["subject_id"], "".join(mapped_seq)))

### 1.2. MSA from the ClustalOmega, MAFFT

Initially, we need to preprocess the dataframe into a suitable input format file for ClustalOmega and MAFFT.

In [14]:
# # Retrieve the unaligned sequences from the local machine - for one sequence
# output_file = f'{directory}/results/alignments/input_files/{id_split}_input.fasta'

# with open(output_file, 'w') as fout:
#     # Write the query sequence to the output file as the first line
#     fout.write(">{}\n{}\n".format(id_split, query_sequence))

#     for index, row in selected_dis.iterrows():
#         accession = row['subject_id']
#         sequence = get_fasta(accession)
#         print(index, accession, len(sequence))
#         if id_split == accession: # remove duplicates
#             continue
#         fout.write(sequence)

In [15]:
# # Retrieve the unaligned sequences from the local machine - for all disprot sequences at once
# for d_id, d_row in disprot_sequences.iterrows():
#     id_split = d_row['disprot_id']
#     seq = d_row['query_sequence']
#     output_file = f'{directory}/results/alignments/input_files/{id_split}_input_1.fasta'
    
#     # Iterating over selected_dis
#     with open(output_file, 'w') as fout:
#         selected_dis = disordered[disordered['query_id'].isin([id_split])]
#         # Write the query sequence to the output file as the first line
#         fout.write(">{}\n{}\n".format(id_split, seq))
#         print(id_split, len(seq)) # correct

#         for index, row in selected_dis.iterrows():
#             accession = row['subject_id']
#             sequence = get_fasta(accession)
#             print(index, accession, len(sequence))
#             if id_split == accession: # remove duplicates
#                 continue
#             fout.write(sequence)

After loading this file into ClustalOmega and MAFFT, we will choose the following parameters:
- Output format: Pearson/FASTA
- Order: input

Next, we will save the outputs in the corresponding directories. After that, we need to open the files in Jalview and remove gaps in the first line corresponding to the query Uniprot ID to maintain the correct length of sequences.
As a result, we have 3 alignments for comparison: BLAST, ClustalOmega and MAFFT.

### 1.3 Preparing the proteins for analysis

In [16]:
# Prepare all sequences for the following comparison - the alignments fasta files
al_blast = f'{directory}/results/alignments/output_files/blast/{id_split}_blast.fasta'
al_clustal = f'{directory}/results/alignments/output_files/clustal/{id_split}_clustal.fasta'
al_mafft = f'{directory}/results/alignments/output_files/mafft/{id_split}_mafft.fasta'

# Make a dataframes of these alignments
blast_seqs = get_seqs(al_blast)
clustal_seqs = get_seqs(al_clustal)
mafft_seqs = get_seqs(al_mafft)
%store blast_seqs clustal_seqs mafft_seqs

# The number of rows and columns (sequences and length of the sequence)
blast_seqs.shape, clustal_seqs.shape, mafft_seqs.shape

Stored 'blast_seqs' (ndarray)
Stored 'clustal_seqs' (ndarray)
Stored 'mafft_seqs' (ndarray)


((200, 270), (198, 270), (198, 270))

In [17]:
# Calculate the statistics
# Returns the values of occupancy and entropy for each alignment
def stats_calculation(seqs, q_id):
    
    data = []
    aa = "ACDEFGHIKLMNPQRSTVWY"

    for i, column in enumerate(seqs.T):

        count = Counter(column)
        try:
            count.pop('-')
        except KeyError:
            pass
        count_sorted = sorted(count.items(), key=lambda x:x[1], reverse=True)

        non_gap = np.count_nonzero(column != "-")
        occupancy = non_gap / column.size

        probabilities = [count.get(k, 0.0) / column.size for k in aa]

        entropy = scipy.stats.entropy(probabilities, base=20)
        data.append([i, q_id, occupancy, entropy, count_sorted])

    df_calc = pd.DataFrame(data, columns=['pos', 'query_id', 'occupancy', 'entropy', 'counts'])
    return df_calc

In [18]:
blast_calc = stats_calculation(blast_seqs, id_split) # initial MSA BLAST
blast_calc.to_csv(f'results/stats/blast_calc_{id_split}.csv')
clustal_calc = stats_calculation(clustal_seqs, id_split) # initial MSA ClustalOmega
mafft_calc = stats_calculation(mafft_seqs, id_split) # initial MSA MAFFT

In [21]:
# Merge files with statistics
folder_path = 'results/stats/'
stats_total = pd.DataFrame()

for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)
        data = pd.read_csv(file_path, index_col=0)
        stats_total = pd.concat([stats_total, data], ignore_index=True)

stats_total.to_csv('results/stats_total.csv')        
stats_total.head()

Unnamed: 0,pos,query_id,occupancy,entropy,counts
0,0,Q9BYI3,1.0,0.010508,"[('M', 199), ('F', 1)]"
1,1,Q9BYI3,1.0,0.162375,"[('F', 165), ('L', 34), ('Y', 1)]"
2,2,Q9BYI3,1.0,0.170247,"[('T', 164), ('A', 34), ('L', 2)]"
3,3,Q9BYI3,1.0,0.162375,"[('S', 165), ('V', 34), ('T', 1)]"
4,4,Q9BYI3,1.0,0.162375,"[('E', 165), ('D', 34), ('C', 1)]"


### 1.4 Calculating and removing redundant regions from MSA
We will use CD-Hit tool. Given the aligned sequences as an input and setting the threshold of 62% we will keep only non-redundant regions in MSA.

In [132]:
# # Calculate redundancy 
# def calculate_Nf(msa_file, threshold, id_split):

#     output_file = f"/Users/alina/HMM/results/alignments/input_files/non-redundant/Nf_{id_split}.fasta"
#     cd_hit_path = "/Users/alina/cd-hit/cd-hit"

#     # Run CD-HIT to cluster the sequences (excluding the first line) and remove redundancy
#     cmd = f"{cd_hit_path} -i {msa_file} -o {output_file} -c {threshold} -n 4 > /dev/null"
#     subprocess.call(cmd, shell=True)

#     # Read the first line from the original MSA file
#     with open(msa_file, "r") as msa_handle:
#         first_record = next(SeqIO.parse(msa_handle, "fasta"))

#     # Temporarily store the non-redundant sequences in a list
#     non_redundant_sequences = []
#     with open(output_file, "r") as output_handle:
#         for record in SeqIO.parse(output_handle, "fasta"):
#             non_redundant_sequences.append(record)

#     # Write the non-redundant sequences to the output file
#     with open(output_file, "w") as final_handle:
#         SeqIO.write([first_record] + non_redundant_sequences, final_handle, "fasta")

#     # Count the number of sequences in the MSA and the non-redundant MSA
#     total_sequences = sum(1 for record in SeqIO.parse(msa_file, "fasta"))
#     non_redundant_sequences_count = len(non_redundant_sequences)

#     # Calculate the effective sequences (Nf)
#     Nf = non_redundant_sequences_count / total_sequences
#     print("The number of non-redundant sequences:", non_redundant_sequences_count)
#     print("The total number of sequences:", total_sequences)
#     print("The ratio of non-redundant sequences (Nf):", "{:.2f}".format(Nf))

#     return

In [99]:
# Calculate redundancy and the number of effective sequences with calculate_Nf function using the .fasta as an input
ali_file = f'{directory}/results/alignments/input_files/{id_split}_input_1.fasta'
print(id_split)
calculate_Nf(ali_file, 0.62, id_split) 

# calculate_Nf(al_blast, 0.62, id_split)
# calculate_Nf(al_clustal, 0.62, id_split)
# calculate_Nf(al_mafft, 0.62, id_split)

Q8R464
The number of non-redundant sequences: 2
The total number of sequences: 194
The ratio of non-redundant sequences (Nf): 0.01


In [21]:
# # Save the generated cluster file for the MSA notebook
# data_file = f"/Users/alina/HMM/results/alignments/input_files/non-redundant/Nf_{id_split}.fasta.clstr"
# %store data_file

In [61]:
# Load non_redundant MSA from ClustalOmega
non_redundant = f'{directory}/results/alignments/output_files/clustal/non-redundant/Nf_{id_split}_clustal.fasta'

nr_seqs = get_seqs(non_redundant)
%store nr_seqs

# Check the shape of non-redundant MSA
print(f"The shape of non-redundant MSA for {id_split} protein:", nr_seqs.shape)
nr_seqs

Stored 'nr_seqs' (ndarray)
The shape of non-redundant MSA for Q9H832 protein: (3, 354)


array([['M', 'A', 'E', ..., 'L', 'R', 'V'],
       ['M', 'A', 'E', ..., 'L', 'R', 'V'],
       ['M', 'A', 'E', ..., 'S', 'H', 'S']], dtype='<U1')

### 1.5 Defining disordered regions in MSA

Here we will look at the positions of disordered regions in an alignment. Then we'll extract these regions for the separate analysis.

In [17]:
# output_directory = f"{directory}/results/alignments/output_files/disordered"

# start_regions = []
# end_regions = []

# for i, row in curated_disprot.iterrows():
# #     id_dis = row.loc['acc']
#     start_regions.append(row.loc['start'])
#     end_regions.append(row.loc['end'])
#     separate_disordered_regions = select_dis_regions(al_blast, id_dis, start_regions, end_regions, output_directory)

# print(f"The lists of start and end positions of the disordered regions: \n"
#           f"Start regions: {start_regions}, \n"
#           f"End regions: {end_regions}")

In [18]:
# Extract the lists of start and end regions
start_regions = curated_query['start'].tolist()
end_regions = curated_query['end'].tolist()
print(f"The lists of start and end positions of the {id_split} disordered regions: \n"
      f"Start regions: {start_regions}, \n"
      f"End regions: {end_regions}")

The lists of start and end positions of the Q93KQ4 disordered regions: 
Start regions: [51], 
End regions: [81]


### 1.5.1 Define disordered regions for the redundant MSAs

In [19]:
# Split the disordered regions with the select_dis_regions function
output_directory = f"{directory}/results/alignments/output_files/disordered"
separate_disordered_regions = select_dis_regions(al_blast, id_split, start_regions, end_regions, output_directory)

# if there is more than 1 region
dis_seqs = print_dis_seqs(output_directory, 'disordered', id_split)
# %store dis_seqs
if isinstance(dis_seqs, list):  # Check if dis_seqs is a list of several disordered regions
    print(id_split, dis_seqs[0].shape, type(dis_seqs[0]))
    print(id_split, dis_seqs[1].shape, type(dis_seqs[1]))
else: # if dis_seqs consists of one disordered region only
    print(id_split, dis_seqs.shape, type(dis_seqs))

Q93KQ4 (73, 31) <class 'numpy.ndarray'>


In [20]:
# Calculation of occupancy and entropy
if isinstance(dis_seqs, list):
    dis_calc0 = stats_calculation(dis_seqs[0])
    dis_calc1 = stats_calculation(dis_seqs[0])
    %store dis_calc0 dis_calc1
else:
    dis_calc = stats_calculation(dis_seqs)
    %store dis_calc

Stored 'dis_calc' (DataFrame)


### 1.5.2 Define disordered regions for the non-redundant MSAs

In [43]:
# Split the disordered regions with the help of select_dis_regions function
output_directory = f"{directory}/results/alignments/output_files/disordered/non-redundant"
separate_disordered_regions = select_dis_regions(non_redundant, id_dis, start_regions, end_regions, output_directory)

# if there are more than 1 region
dis_seqs_nr = print_dis_seqs(output_directory, 'disordered', id_dis)
# %store dis_seqs
if isinstance(dis_seqs, list):  # Check if dis_seqs is a list of several disordered regions
    print(id_dis, dis_seqs_nr[0].shape, type(dis_seqs_nr[0]))
    print(id_dis, dis_seqs_nr[1].shape, type(dis_seqs_nr[1]))
else: # if dis_seqs consists of one disordered region only
    print(id_dis, dis_seqs_nr.shape, type(dis_seqs_nr))

Q9H832 (3, 28) <class 'numpy.ndarray'>
Q9H832 (3, 99) <class 'numpy.ndarray'>


In [44]:
# Calculation of occupancy and entropy
if isinstance(dis_seqs, list):
    dis_calc_nr0 = stats_calculation(dis_seqs_nr[0])
    dis_calc_nr1 = stats_calculation(dis_seqs_nr[0])
    %store dis_calc_nr0 dis_calc_nr1
else:
    dis_calc_nr = stats_calculation(dis_seqs_nr)
    %store dis_calc_nr

Stored 'dis_calc_nr0' (DataFrame)
Stored 'dis_calc_nr1' (DataFrame)
