In [4]:
# prepare the modules
import pandas as pd
from pandas import DataFrame
from pybiomart import Server
import os
import re
from Bio import SeqIO
import numpy as np
import peptides
from Bio import SeqIO
import ast
import plotly.express as px
import plotly.graph_objects as go
from Bio import Phylo, AlignIO
from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor
from Bio.Align import MultipleSeqAlignment


# Sequence retrieval

In [None]:
# Get the Uniprot ID and Ensembl GeneID of all hSLC6
df_resolute = pd.ExcelFile('../Inputs/RESOLUTE_SLCs.xlsx').parse('Sheet1')
SLCFamily = 'SLC6'
dfFiltered = df_resolute.query("family == @SLCFamily")
UniprotList = list(dfFiltered['UniProt Accession'])
SLC_name = list(dfFiltered['SLC_name'])
SLC_EnsembleGeneID = list(dfFiltered['Ensembl GeneID'])

# Query Biomart to retrieve all protein_coding sequences from multiple transcripts of each SLC gene
server = Server(host='http://www.ensembl.org')
dataset = (server.marts['ENSEMBL_MART_ENSEMBL'].datasets['hsapiens_gene_ensembl'])
allSeq = []
for ID in SLC_EnsembleGeneID:
    allSeq.append(dataset.query(
        attributes=['external_gene_name', 'ensembl_gene_id', 'ensembl_transcript_id', 'transcript_is_canonical', 'peptide'],
        filters={'link_ensembl_gene_id': [ID], 'transcript_biotype': ['protein_coding'], 'transcript_is_canonical': [1.0]},))

# Concat the result in one single dataframe
df_allSeq = pd.concat([df for df in allSeq], ignore_index=True)

# Clean the peptide terminating mark *, as this sign takes a digit for global descriptor calculation
peptide = []
for item in df_allSeq.Peptide:
    peptide.append(item.replace('*', ''))
df_allSeq.drop(['Peptide'], axis=1, inplace=True)
df_allSeq['peptide'] = peptide

In [None]:
dataset.list_filters()

# MSA

In [None]:
# Prepare for alignment the fasta file for the canonical and isoform sequences
df_allSeq['Ensembl Canonical'] = df_allSeq['Ensembl Canonical'].astype(str)
with open(r'../Inputs/IsoFastaSLC6.fasta', 'w') as IsoFasta:
    for idx1, a in df_allSeq.iterrows():
        IsoFasta.write(
            ">" + 'Gene_name=' + a['Gene name'] + '|transcript_id=' + a['Transcript stable ID'] + '|canonical=' + a[
                'Ensembl Canonical'] + "\n")
        IsoFasta.write(a['peptide'] + "\n")
        IsoFasta.write('\n')

# Create Multi-sequence alignment with single fasta file containing all sequences
in_file_Iso = r'../Inputs/IsoFastaSLC6.fasta'
out_file_Iso = r'../Inputs/AlignMuscleSLC6_Iso.fasta'
os.system('muscle -in %s -out %s' % (in_file_Iso, out_file_Iso))

In [5]:
# convert the MSA
out_file_Iso = r'../Inputs/AlignMuscleSLC6_Iso.fasta'

# Parse the fasta file and convert it into a DataFrame
alignment_all = pd.DataFrame([{'ID': record.id, 'Sequence': str(record.seq)} for record in SeqIO.parse(out_file_Iso, 'fasta')])

# Split the ID column into multiple columns and drop the original ID column
alignment_all[['Gene_name', 'transcript_id', 'canonical']] = alignment_all['ID'].str.split('|', expand=True)
alignment_all.drop('ID', axis=1, inplace=True)

# Filter rows where canonical is 'canonical=1.0' and keep only the Gene_name and Sequence columns
alignment = alignment_all[alignment_all['canonical'] == 'canonical=1.0'][['Gene_name', 'Sequence','transcript_id']]

# Keep rows seperately where canonical is 'canonical=nan' and keep only the Gene_name and transcript_id columns
alignment_iso = alignment_all[alignment_all['canonical'] == 'canonical=nan'][['Gene_name', 'transcript_id']]

# Remove the "Gene_name=" from the Gene_name column and the other unneccessary strings
alignment['Gene_name'] = alignment['Gene_name'].str.replace('Gene_name=', '')
alignment['transcript_id'] = alignment['transcript_id'].str.replace('transcript_id=', '')
alignment_iso['Gene_name'] = alignment_iso['Gene_name'].str.replace('Gene_name=', '')
alignment_iso['transcript_id'] = alignment_iso['transcript_id'].str.replace('transcript_id=', '')
# Convert the DataFrame into a dictionary where the keys are the Gene_name and the values are the sequences
alignment_dict = alignment.set_index('Gene_name')['Sequence'].to_dict()

# replace the alphabet inn Sequence with 0
alignment_dict = {key: re.sub('[a-zA-Z]', '0', value) for key, value in alignment_dict.items()}

# Score pending

## interaface plddt & Avg plddt & model ranking

In [6]:
# merged the three scores in one dataframe

# Specify the directory
directory = '/Users/jiahui/ucloud/Documents/SLC/Resolution/SLC6_HotSpots/PPI/AF/python_scripts/Inputs/interface_plddt'

# Get a list of all csv files in the directory
plddt_files = [f for f in os.listdir(directory) if f.endswith('.csv')]

# Initialize empty list to hold the dataframes
plddt = []

# Loop through the csv files and read each one into a pandas DataFrame
for interaction_file in plddt_files:
    # Extract X_Y and n from the filename
    X_Y, rank = interaction_file[:-4].rsplit('_', 1)
    
    df_mean_visualization = pd.read_csv(os.path.join(directory, interaction_file), header=None).T
    
    # Split the first column into two columns and keep only the float value
    df_mean_visualization[0] = df_mean_visualization[0].str.split(' ').str[2].astype(float)
    df_mean_visualization[1] = df_mean_visualization[1].str.split(' ').str[2].astype(float)
    
    # Rename the column
    df_mean_visualization = df_mean_visualization.rename(columns={0: 'Avg pLDDT', 1: 'Interface pLDDT'})
    df_mean_visualization['rank'] = rank   
    
    # Set X_Y as the index
    df_mean_visualization.index = [X_Y]
    
    plddt.append(df_mean_visualization)

# Concatenate all the dataframes in the list
merged_scores = pd.concat(plddt, ignore_index=False)

## Interaction residues

In [7]:
# append the interaction residue in the merged_scores dataframe

# Specify the directory
interaction_directory = '/Users/jiahui/ucloud/Documents/SLC/Resolution/SLC6_HotSpots/PPI/AF/python_scripts/Inputs/interaction_csv'

# Get a list of all csv files in the new directory
interaction_files = [f for f in os.listdir(interaction_directory) if f.endswith('.csv')]

# Loop through the csv files in the new directory
for interaction_file in interaction_files:
    # Extract X_Y and n from the filename
    X_Y, rank = interaction_file[:-4].rsplit('_', 1)
    
    # Read the csv file into a pandas DataFrame
    df_interaction = pd.read_csv(os.path.join(interaction_directory, interaction_file))
    
    # Extract the fourth column from each row and store it in a list
    interaction_list = df_interaction.iloc[:, 3].tolist()
    
# Check if the X_Y_n from the new directory matches with any X_Y(index) n(rank) in the merged_scores DataFrame
    if (X_Y in merged_scores.index) and (merged_scores.loc[X_Y, 'rank'] == rank).any():
        # If a match is found, append the list to the corresponding row in the merged_scores DataFrame
        merged_scores.loc[(merged_scores.index == X_Y) & (merged_scores['rank'] == rank), 'interaction list'] = str(interaction_list)


# Rename the new column as "interaction list"
merged_scores.rename(columns={merged_scores.columns[-1]: 'interaction list'}, inplace=True)

# the length of "interaction_list" in each row and orint it
merged_scores['list length'] = merged_scores['interaction list'].str.len()

  merged_scores.loc[(merged_scores.index == X_Y) & (merged_scores['rank'] == rank), 'interaction list'] = str(interaction_list)


In [8]:
# make sure the first three column are float
merged_scores.iloc[:, 0:3] = merged_scores.iloc[:, 0:3].astype(float)
merged_scores['interaction list'] = merged_scores['interaction list'].apply(ast.literal_eval)
# extract the SLC6Ai from the merged_scores into a seperated column and name it as Gene name
merged_scores['Gene name'] = merged_scores.index.str.split('_').str[1]

# create a function to find the relative postion of nth residue in the alignment
def absolute_to_relative(alignment, n):
    relative_position = 0
    absolute_position = 0
    for residue in alignment:
        if residue != '-':
            absolute_position += 1
        relative_position += 1        
        if absolute_position == n:
            return relative_position


# cast alignment_dict into a np.array, with all the '-' into nan and 0 remains 0, the index of each line should be the key of the dict
alignment_score_array = np.array([[np.nan if residue == '-' else 0 for residue in sequence] for sequence in alignment_dict.values()])

# stack itself 3 times, the 4th as a count of additions
alignment_score_array = np.dstack([alignment_score_array, alignment_score_array, alignment_score_array, np.zeros_like(alignment_score_array)], )

# name list
gene_names = list(alignment_dict.keys())

# iterate through the merged_scores and calculate the score for each interaction residue
for idx, row in merged_scores.iterrows():
    gene_name = row['Gene name']
    if gene_name in gene_names:
        alignment_list = alignment_dict[gene_name]
        interaction_list = row[3]
        for val in interaction_list:  
            relative_position = absolute_to_relative(alignment_list, int(val)) - 1
            assert alignment_list[relative_position] != '-'
            score = [(row[0]/100), (row[1]/100), np.exp(-0.1*row[2])]
            # find index of gene_name in gene_names
            index = gene_names.index(gene_name) # this makes sure the alignment_score_array is in the same order/ same the same index as the gene_names
            alignment_score_array[index][relative_position][0] += score[0]
            alignment_score_array[index][relative_position][1] += score[1]
            alignment_score_array[index][relative_position][2] += score[2]
            alignment_score_array[index][relative_position][3] += 1

mean_scores = np.where(alignment_score_array[:, :, 3, None] != 0, alignment_score_array[:, :, :3] / alignment_score_array[:, :, 3, None], 0)
sum_scores = alignment_score_array[:, :, 3]

# add up each column of the mean_scores into a array of shap [992,3]
mean_scores_family = np.nan_to_num(np.sum(mean_scores, axis=0) / np.count_nonzero(mean_scores, axis=0))
sum_scores_family = np.sum(mean_scores, axis=0)

  interaction_list = row[3]
  score = [(row[0]/100), (row[1]/100), np.exp(-0.1*row[2])]
  mean_scores = np.where(alignment_score_array[:, :, 3, None] != 0, alignment_score_array[:, :, :3] / alignment_score_array[:, :, 3, None], 0)
  mean_scores_family = np.nan_to_num(np.sum(mean_scores, axis=0) / np.count_nonzero(mean_scores, axis=0))


# Visualisation

In [9]:
df_mean_visualization = pd.DataFrame(mean_scores_family).T
df_sum_visualization = pd.DataFrame(sum_scores_family).T

import plotly.subplots as sp
def heatmap(df_heatmap, title, y_text):
    fig = go.Figure(data=go.Heatmap(
                       z=df_heatmap.values,
                       colorscale='RdPu'))
    fig.update_layout(
    title=title,
    yaxis=dict(
        tickmode='array',
        tickvals=list(range(df_heatmap.shape[0])),
        ticktext=y_text
    )
)
    fig.show()

heatmap(df_mean_visualization, 'Mean score of SLC6 family', ['Avg pLDDT', 'Interface pLDDT', 'Model Rank'])
heatmap(df_sum_visualization, 'Sum scores of the SLC6 family', ['Avg pLDDT', 'Interface pLDDT', 'Model Rank'])




In [10]:
# Read the alignment file
alignment_visual = AlignIO.read('../Inputs/AlignMuscleSLC6_Iso.fasta', 'fasta')
# Filter the alignment to include only canonical sequences and sequences that contain "SLC6"
alignment_visual = [record for record in alignment_visual if 'SLC6A' in record.id and 'canonical=1.0' in record.id]
# Simplify the id of each sequence, keep only gene name
for record in alignment_visual:
    record.id = record.id.split('|')[0]
    record.id = record.id.replace("Gene_name=", "")  # Modify this line according to the format of your ids
# Create a new MultipleSeqAlignment with the filtered records
alignment_visual = MultipleSeqAlignment(alignment_visual)
# Generate distance matrix
calculator = DistanceCalculator('identity')
dm = calculator.get_distance(alignment_visual)

# Construct phylogenetic tree
constructor = DistanceTreeConstructor()
tree = constructor.nj(dm)

# Print the phylogenetic tree
Phylo.draw_ascii(tree)


                               ___________________________ SLC6A15
                ______________|
             __|              |__________________________ SLC6A17
            |  |
  __________|  |____________________________________________________ SLC6A16
 |          |
 |          |    _______________________________ SLC6A19
 |          |___|
 |              | ______________________________ SLC6A20
 |              ||
 |               |______________________________ SLC6A18
 |
 |       _________________________________________________ SLC6A5
 |   ___|
 |  |   |________________________________ SLC6A14
 |__|
 |  |   _____________________________ SLC6A9
 |  |__|
 |     |___________________________ SLC6A7
 |
_|              ________________ SLC6A2
 |    _________|
 |___|         |_________________ SLC6A3
 |   |
 |   |_______________________________ SLC6A4
 |
 |      __________________________ SLC6A8
 |     |
 |   __|  ______________________ SLC6A6
 |  |  | |
 |  |  |_|     ______________

In [11]:
# Create a mapping from gene names to their index

# Get the list of terminal nodes
tree_list = [terminal.name for terminal in tree.get_terminals()]

# Create DataFrame for the avg_plddt and local_plddt score from the mean_scores array
mean_avg_plddt = pd.DataFrame(mean_scores[:,:,0])
mean_local_plddt = pd.DataFrame(mean_scores[:,:,1])

# Create a mapping from gene names to their index
mapping = {gene: i for i, gene in enumerate(gene_names)}

# Create a new order for your dataframes
new_order = [mapping[gene] for gene in tree_list]

# Reorder mean_avg_plddt and mean_local_plddt based on the new order
mean_avg_plddt = mean_avg_plddt.iloc[new_order]
mean_local_plddt = mean_local_plddt.iloc[new_order]

# Create a heatmap for the mean_avg_plddt and mean_local_plddt
heatmap(mean_avg_plddt[::-1], 'Mean Avgerage pLDDT of each SLC6s', tree_list[::-1])
heatmap(mean_local_plddt[::-1], 'Mean Interface pLDDT of each SLC6s', tree_list[::-1])


# Read the mutation data and transfer the position of the muttaion
df_mutations = pd.read_csv('../Inputs/SLC6_UniprotClinvarLitVar_Pathogenicity_fullTranscript.csv')
# Filter the mutation data to include only canonical sequences
df_mutations = df_mutations[~df_mutations['transcript_id'].isin(alignment_iso['transcript_id'])]
# Create a new column for the relative position
df_mutations['relative_position'] = df_mutations.apply(lambda x: absolute_to_relative(alignment_dict[x['SLC_name']], x['Position']), axis=1)
# box plot of cout of pathogenic mutations on their relaive positions
counts = df_mutations[df_mutations['Pathogenicity'] == 1].groupby('relative_position').size()
df_count_patho = counts.reset_index()
df_count_patho.columns = ['relative_position', 'count']

num_columns = mean_local_plddt.shape[1]
# Create a DataFrame with all x values
x_values = pd.DataFrame({'relative_position': range(num_columns)})

# Merge with counts_df
df_count_patho_msa = pd.merge(x_values, df_count_patho, on='relative_position', how='left')

# Fill NaN values with 0
df_count_patho_msa['count'].fillna(0, inplace=True)

# Create a line plot
fig = go.Figure(data=go.Scatter(x=df_count_patho_msa['relative_position'], y=df_count_patho_msa['count'], mode='lines'))

# Set the title and labels
fig.update_layout(title='Pathogenicity of mutations at each relative position',
                  xaxis_title='Relative Position',
                  yaxis_title='Count')

# Show the plot
fig.show()


In [12]:
# Create a mapping from gene names to their index
mapping_gene2sequence = {gene: i for i, gene in enumerate(gene_names)}

# Create a new order for your dataframes based on the tree_list
new_order = [mapping_gene2sequence[gene] for gene in tree_list]

# Reorder mean_avg_plddt and mean_local_plddt based on the new order
mean_avg_plddt = pd.DataFrame(mean_scores[:, :, 0])
mean_local_plddt = pd.DataFrame(mean_scores[:, :, 1])
mean_avg_plddt = mean_avg_plddt.iloc[new_order]
mean_local_plddt = mean_local_plddt.iloc[new_order]

heatmap_trace_local= go.Heatmap(z=mean_local_plddt[::-1], 
                           colorscale='Greys', 
                           y=tree_list[::-1],
                           zmin=0, zmax=1, 
                           colorbar=dict(title='Mean<br>Interface<br>pLDDT'))
heatmap_trace_avg= go.Heatmap(z=mean_avg_plddt[::-1],
                            colorscale='Greys',
                            y=tree_list[::-1],
                            zmin=0, zmax=1,
                            colorbar=dict(title='Mean<br>Average<br>pLDDT'))



# Read the mutation data and transfer the position of the mutation
df_mutations = pd.read_csv('../Inputs/SLC6_UniprotClinvarLitVar_Pathogenicity_fullTranscript.csv')

# Filter the mutation data to include only canonical sequences
df_mutations = df_mutations[~df_mutations['transcript_id'].isin(alignment_iso['transcript_id'])]

# Create a new column for the relative position
df_mutations['relative_position'] = df_mutations.apply(lambda x: absolute_to_relative(alignment_dict[x['SLC_name']], x['Position']), axis=1)

# Count of pathogenic mutations on their relative positions
count_patho = df_mutations[df_mutations['Pathogenicity'] == 1].groupby('relative_position').size()
df_count_patho = count_patho.reset_index()
df_count_patho.columns = ['relative_position', 'count']


# Coun the pathogenic mutations on their relative positions for every relative positions. For position with no count, fill it with 0
df_count_patho_msa = pd.merge(pd.DataFrame({'relative_position': range(mean_local_plddt.shape[1])}), df_count_patho, on='relative_position', how='left')
df_count_patho_msa['count'].fillna(0, inplace=True)

line_plot_trace = go.Scatter(x=df_count_patho_msa['relative_position'], 
                             y=df_count_patho_msa['count'], 
                             mode='lines', 
                             line=dict(color='black', width=1),
                             name='Pathogenicity Count'
                             )
layout = go.Layout(title='Count of pathogenicity of mutations and mean pLDDT of each SLC6s',
                   xaxis=dict(title='Relative Position'),
                   yaxis=dict(title='Gene Names',
                              tickvals=tree_list[::-1],  # Set the tick values to the tree_list
                              ticktext=tree_list[::-1]),          
                   yaxis2=dict(title='Count', overlaying='y', side='right'),
                   plot_bgcolor='rgba(0,0,0,0)'
                   )
fig_local = go.Figure(data=[heatmap_trace_local, line_plot_trace], layout=layout)
fig_avg = go.Figure(data=[heatmap_trace_avg, line_plot_trace], layout=layout)
fig_local.show()
fig_avg.show()

# Fill the metrix with Alphafold-misssence

In [14]:
# preprocess the Alpha-missence data
import pandas as pd
from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore")

# A function which convert the column of "likely_benign", "likely_pathogenic" and "ambiguous" in to labels of 0, 1 and 2
def convert_label(x):
    assert x in ["likely_benign", "likely_pathogenic", "ambiguous"]
    if x == "likely_benign":
        return 0
    elif x == "likely_pathogenic":
        return 1
    elif x == "ambiguous":
        return 2
    return pd.np.nan

# load Alphamissence data from tsv to data frame
aa_substitution = pd.read_csv('../data/AlphaMissense_hg38.tsv',sep='\t', skiprows=3, chunksize=100000)

output = pd.DataFrame()
for chunk in tqdm(aa_substitution):
    columns = ['transcript_id', 'protein_variant', "am_class", "am_pathogenicity"]
    details = chunk[columns]
    details['count'] = 1
    details['am_class'] = details['am_class'].apply(convert_label)
    groupup_allele = details.groupby(columns).sum().reset_index()
    output = output.append(groupup_allele, ignore_index=True)

final_output = output.groupby(columns).sum().reset_index()
final_output.to_csv('../data/aggregated_hg38.csv', index=False)

FileNotFoundError: [Errno 2] No such file or directory: '../data/AlphaMissense_hg38.tsv'

In [15]:
# Load the Alpha-miss data
aggregated_hg38 = pd.read_csv('../Inputs/aggregated_hg38.csv')

# Split the mutation of DeepMind data into three columns: original, position and mutation
def split_variant(x):
    return re.match(r"([A-Z])([0-9]+)([A-Z])",x).groups()
aggregated_hg38["original"], aggregated_hg38["position"], aggregated_hg38["mutat"] = zip(*aggregated_hg38["protein_variant"].apply(split_variant))

# Ignore the transcript content after comma
aggregated_hg38['transcript_id'] = aggregated_hg38['transcript_id'].apply(lambda x: x.split(".")[0])

"""
SLC6A4,2,1,12 canonical sequences are not found in the alpha-miss data
"""
# check if transcript_id of canonical SLC6A4,2,1,12 is in the aggregated_hg38['transcript_id']
transcript_ids = ['00000650711', '00000568943', '00000287766', '00000684302']
# Create a boolean Series for each id and concatenate them with logical OR
contains_values = pd.concat([aggregated_hg38['transcript_id'].str.contains(id) for id in transcript_ids], axis=1).any(axis=1)
# Check if any True exists in the Series
any_contains_values = contains_values.any()
print(any_contains_values)

False


In [16]:
# Filter the aggregated_hg38 DataFrame
filtered_hg38 = aggregated_hg38[aggregated_hg38['transcript_id'].isin(alignment['transcript_id'])]
# Add gene_name to the filtered_hg38 DataFrame
mapping_gene2transcript = alignment.set_index('transcript_id')['Gene_name']
filtered_hg38['Gene_name'] = filtered_hg38['transcript_id'].map(mapping_gene2transcript)
# Convert the position to relative position
filtered_hg38['relative_position'] = filtered_hg38.apply(lambda x: absolute_to_relative(alignment_dict[x['Gene_name']], int(x['position'])), axis=1)
# Calculate the mean std of the am_pathogenicity for each relative position of each Gene_name
mean_am_pathogenicity = filtered_hg38.groupby(['Gene_name', 'relative_position'])['am_pathogenicity'].mean().reset_index()
std_am_pathogenicity = filtered_hg38.groupby(['Gene_name', 'relative_position'])['am_pathogenicity'].std().reset_index()
# Merge the mean and std of am_pathogenicity into a single DataFrame
mean_std_am = mean_am_pathogenicity.merge(std_am_pathogenicity, on=['Gene_name', 'relative_position'], suffixes=('_mean', '_std'))

# Reorder the dataframe according to tree_list
mean_std_am['Gene_name'] = pd.Categorical(mean_std_am['Gene_name'], categories=tree_list, ordered=True)
mean_std_am = mean_std_am.sort_values(['Gene_name','relative_position'])

# Creat heatmap from mean_std_am with spherical markers
fig = px.scatter(mean_std_am, x='relative_position', y='Gene_name', color='am_pathogenicity_mean',
                  size='am_pathogenicity_std', size_max=15, color_continuous_scale = 'Greys',
                  labels={'am_pathogenicity_mean': 'Mean<br>pathogenicity<br>probability'})

fig.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    title='Alpha-Missense prediction of SLC6 family',
    xaxis_title='Relative position',
    yaxis_title='Gene names'
)
# Add this line to reverse the y-axis
fig.update_yaxes(autorange="reversed")
fig.show()

# Retro-checking

In [15]:
# Create a widget for giving inputs
import ipywidgets as widgets

def relative_to_absolute(alignment, n):
    absolute_position = 0
    relative_position = 0
    for residue in alignment:
        if residue != '-':
            absolute_position += 1
        relative_position += 1
        if relative_position == n:
            return absolute_position
        
def get_absolute_positions(gene_name, n_values):
    alignment = alignment_dict[gene_name]
    absolute_positions = [relative_to_absolute(alignment, n) for n in n_values]
    return absolute_positions

# Create text field for gene_name
gene_name_text = widgets.Text(
    value='',
    placeholder='SLC gene name in capital letters',
    description='SLC name:'
)

# Create text field for n_values
relative_psotions_text = widgets.Text(
    value='',
    placeholder='Relative positions separated by ,',
    description='Positions:'
)

output = widgets.Output()
# output.clear_output()

# Create a button to trigger the calculation
button = widgets.Button(description="Find")

# Define what happens when the button is clicked
def on_button_clicked(b):
    with output:
        output.clear_output()
        # Get the gene name and n values from the text fields
        gene_name = gene_name_text.value
        relative_positions = list(map(int, relative_psotions_text.value.split(',')))

        # Calculate the absolute positions
        absolute_positions = get_absolute_positions(gene_name, relative_positions)

        # Display the result in the output widget
        print("correspondent absolute position:")
        print(', '.join(map(str, absolute_positions)))


# Attach the event to the button
button.on_click(on_button_clicked)

# Display the widgets
display(gene_name_text, relative_psotions_text, button, output)

Text(value='', description='SLC name:', placeholder='SLC gene name in capital letters')

Text(value='', description='Positions:', placeholder='Relative positions separated by ,')

Button(description='Find', style=ButtonStyle())

Output()