# **Hotspots finder**

Python script to find mutation hotspots in a DNA sequence

For additional details, please refer to the GitHub repository: https://github.com/RTlabCBM/FidelityFinder

In [None]:
#@markdown # Upload your CSV files
#@markdown The input must be csv files with the following format: POS, REF, ALT, AO, CIGAR, TYPE, AO_rate, Variant frequency (%)\
#@markdown CSV files with this format can be obtained using the vcf_analyzer script (found in  https://github.com/RTlabCBM/FidelityFinder)

#@markdown

#@markdown Select one option to upload your excel files:

#@markdown - Upload from your computer
upload_from_computer = True #@param {type:"boolean"}
#@markdown - Upload from the Google Drive folder provided
upload_from_drive =  False #@param {type:"boolean"}
folder_path = '/content/drive/MyDrive/folder_path' #@param {type:"string"}




## Imports
##----------

from google.colab import files
from google.colab import drive
from google.colab import files
import os, sys



## Functions
##----------

def send_error_message(message):
    # Gives an error message
    print("\tERROR: {}\n".format(message))
    sys.exit(1)




## Main program
##-------------

## Step 1: Upload Files: Files are added to the uploaded_files_list list based on the chosen upload option.
##--------------------

# Option: upload_from_computer:
if upload_from_computer and not upload_from_drive:
  uploaded_files = files.upload()
  uploaded_files_list = uploaded_files.keys()
  print(f"{len(uploaded_files_list)} files uploaded")

# Option: upload_from_drive:
elif not upload_from_computer and upload_from_drive:
  drive.mount('/content/drive')
  if os.path.exists(folder_path):
      uploaded_files_list = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.csv')]
      print(f"{len(uploaded_files_list)} files uploaded")
  else:
      message = "The specified folder path does not exist:\n" \
                "Write the path to your folder file. Example:\n" \
                "/content/drive/MyDrive/folder_name"
      send_error_message(message)

# Both options selected:
elif upload_from_computer and upload_from_drive:
    message = "Select ONLY one option to upload your files:\n" \
              "  - upload_from_computer\n" \
              "  - upload_from_drive"
    send_error_message(message)

# No option selected:
else:
    message = "Select one option to upload your files:\n" \
              "  - upload_from_computer\n" \
              "  - upload_from_drive"
    send_error_message(message)



# Step 2: Check the length of uploaded_files
##--------------------

if len(uploaded_files_list) < 1:
    message = "0 files were uploaded\n" \
              "Run the cell again and make sure that files are uploaded"
    send_error_message(message)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
15 files uploaded


In [None]:
#@markdown # Input parameters

#@markdown Prefix for the output filename.

output_prefix = "undefined" #@param {type:"string"}

#@markdown Graphs axis sizes:
graphs_x_axis_size = 50 # @param {type:"integer"}
graphs_y_axis_size = 10 # @param {type:"integer"}

#@markdown If selected, use a logarithmic scale for the y-axis.
graphs_log_scale = False #@param {type:"boolean"}

#@markdown If selected, show each position tick in the x-axis of the graphs
graphs_detailed = False #@param {type:"boolean"}

#@markdown If selected, fix the x-axis range between graphs_x_min and graphs_x_max.
graphs_fixed_range = True #@param {type:"boolean"}
graphs_x_min = 99 # @param {type:"integer"}
graphs_x_max = 401 # @param {type:"integer"}

In [None]:
#@markdown # Main program
__file__ = "main_program"

!pip install xlsxwriter==3.1.9


# encoding: UTF-8



__doc__ = """
SYNOPSIS

  Python Script to find mutation hotspots in a sequence


  This script is an adaptation of the original program to be run on Google Colab.

DESCRIPTION

 Parameters. These variables are needed:
    - input_files_list (list): a list with the paths of the excel files to be merged
    - output_prefix (str): Output prefix for the merged Excel file.
    - graphs_x_axis_size (int): Size of the x-axis in the graph.
    - graphs_y_axis_size (int): Size of the y-axis in the graph.
    - graphs_log_scale (bool): If True, use a logarithmic scale for the y-axis.
    - graphs_detailed (bool): If True, show detailed x-axis with unique positions.
    - graphs_fixed_range (bool): If True, fix the x-axis range between graphs_x_min and graphs_x_max.
    - graphs_x_min (int): Minimum value for the x-axis range.
    - graphs_x_max (int): Maximum value for the x-axis range.


 Output files:
    - csv_file: CSV file containing all the variants found and the sum of their frequency (AO values)
    - excel_file: Excel file containing multiple sheets with extracted data
    - heatmap_snp_types_graph: Image file (PNG) of a heatmap showing SNP types as percentages.
    - cumulative_graph_ao: Image file (PNG) of a cumulative bar graph for total AO values.
    - cumulative_graph_ao_rate: Image file (PNG) of a cumulative bar graph for total AO rate values.
    - indels_graph: Image file (PNG) of a cumulative bar graph for indels rate values.
    - snp_graph: Image file (PNG) of a cumulative bar graph for SNP rate values.
    - tc_snp_graph: Image file (PNG) of a cumulative bar graph for TC SNP rate values.

AUTHORS

    Javier Martinez del RÃ­o (javier.martinez@cbm.csic.es; javier.mardelrio@gmail.com)

"""

__version__ = 'v1.0.0'





## Imports
##-------------

import pandas as pd
import xlsxwriter
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import math
import sys, os

from tqdm import tqdm
%matplotlib inline




## Functions
##-------------


def csvs_to_df(csv_files):
    """
    Combines multiple CSV files into a single DataFrame based on common columns.

    Parameters:
        - csv_files (list): List of file paths to CSV files.

    Returns:
        - merged_df (pd.DataFrame): Merged DataFrame containing data from all CSV files.
    """
    merged_df = pd.DataFrame(columns=['POS', 'REF', 'ALT', 'CIGAR', 'TYPE'])
    for file_path in csv_files:
      df = pd.read_csv(file_path)
      sample_name = os.path.basename(file_path).split(".csv")[0]
      df = df.rename(columns={'AO': sample_name + '_AO', 'AO_rate': sample_name + '_AO_rate', 'Variant frequency (%)': sample_name + '_Variant_frequency_(%)'}) #"AO", "AO_rate" and "Variant_frequency_(%)" columns are renamed to identify samples
      merged_df = pd.merge(merged_df, df, on=['POS', 'REF', 'ALT', 'CIGAR', 'TYPE'], how='outer').sort_values('POS')
    return merged_df


def create_dict_snp_types(dataframe, ao_columns):
    """
    Creates a dictionary with counts for each type of substitution (SNP variants).

    Parameters:
        - dataframe (pd.DataFrame): Input dataframe
        - ao_columns (list): list of columns of the dataframe that contain the frequency of each mutation

    Returns:
        - dict_snp_types (dict): Dictionary containing SNP types as keys and their corresponding counts as values. Example: {"AT": 45, "AG": 32, ...}
    """
    dict_snp_types={}
    def count_snp_types(row):
      snp_type = row["REF"] + row["ALT"]
      try:
        dict_snp_types[snp_type] += row[ao_columns].sum()
      except:
        dict_snp_types[snp_type] = row[ao_columns].sum()
    dataframe[dataframe["TYPE"]=="snp"].apply(count_snp_types, axis=1)
    return dict_snp_types



def create_df_snp_types(dict_snp_types):
    """
    Creates a dataframe using the dictionary of snp_types obtained with create_dict_snp_types function.
    In the left part of the table, we show the nucleotides in the reference sequence,
    and in the upper part of the table, the nucleotides for which they are substituted.

    Parameters:
    - dict_snp_types (dict): Dictionary containing counts of SNP types.

    Returns:
    - df_snp_types (pd.DataFrame): DataFrame representing the SNP types table.
    """
    bases = ['G', 'A', 'T', 'C']
    table = {}
    for base in bases:
      table[base] = {}
      for other_base in bases:
          table[base][other_base] = None
    for key, value in dict_snp_types.items():
      base1 = key[1]
      base2 = key[0]
      table[base1][base2] = str(value)
    df_snp_types = pd.DataFrame(table)
    return df_snp_types


def create_cumulative_graph(dataframe, AO_columns, output_prefix, graph_name,
                            graphs_x_axis_size, graphs_y_axis_size,
                            graphs_log_scale, graphs_detailed,
                            graphs_fixed_range, graphs_x_min, graphs_x_max):
    """
    Creates a cumulative bar graph.

    Parameters:
        - dataframe (pd.DataFrame): Input DataFrame.
        - AO_columns (list): List of columns containing values for the cumulative graph.
        - output_prefix (str): Prefix for the output filename.
        - graph_name (str): Name of the graph.
        - graphs_x_axis_size (int): Size of the x-axis in the graph.
        - graphs_y_axis_size (int): Size of the y-axis in the graph.
        - graphs_log_scale (bool): If True, use a logarithmic scale for the y-axis.
        - graphs_detailed (bool): If True, show detailed x-axis with unique positions.
        - graphs_fixed_range (bool): If True, fix the x-axis range between graphs_x_min and graphs_x_max.
        - graphs_x_min (int): Minimum value for the x-axis range.
        - graphs_x_max (int): Maximum value for the x-axis range.

    Returns:
        - cumulative_graph (str): File path of the saved cumulative graph.
    """
    print(f'\t\tPlotting {graph_name}')
    fig, ax = plt.subplots(figsize=(40, 8))

    # Extract values from the columns of interest in the DataFrame
    AO_values = dataframe[AO_columns]

    # Initialize a variable to store the cumulative height of the previous bars
    bottom = np.zeros(len(dataframe['POS']))
    # Create cumulative bars for each column in AO_columns
    for col in tqdm(AO_columns):
        ax.bar(dataframe['POS'], AO_values[col], alpha=0.8, bottom=bottom, label=col)
        bottom += AO_values[col]

    # Configure parameters of the graphs
    ax.set_xlabel('Position')
    ax.set_ylabel('Variants')
    ax.set_title(output_prefix + "_" + graph_name)
    ax.legend(loc='upper left', bbox_to_anchor=(1.2, 0.6))

    if graphs_log_scale == True:
      plt.yscale('log')

    if graphs_detailed == True:
      positions = dataframe['POS'].unique()
      plt.xticks(positions, rotation=90)

    if graphs_fixed_range == True:
      plt.xlim(graphs_x_min, graphs_x_max)

    cumulative_graph = f'{output_prefix}_{graph_name}_graph.png'
    plt.savefig(cumulative_graph, bbox_inches='tight')
    return cumulative_graph


def process_merged_df(merged_df, output_prefix, graphs_x_axis_size, graphs_y_axis_size,
                      graphs_log_scale, graphs_detailed, graphs_fixed_range,
                      graphs_x_min, graphs_x_max):
    global ao_columns
    global ao_rate_columns
    global variant_frequency_columns
    global grouped_merged_df
    global grouped_indels_df

    """
    Creates tables and graphs using a merged_df as input.

    Parameters:
        - merged_df (pd.DataFrame): Input DataFrame.
        - output_prefix (str): Prefix for output filenames.
        - graphs_x_axis_size (int): Size of the x-axis in the graphs.
        - graphs_y_axis_size (int): Size of the y-axis in the graphs.
        - graphs_log_scale (bool): If True, use a logarithmic scale for the y-axis in the graphs.
        - graphs_detailed (bool): If True, show detailed x-axis with unique positions in the graphs.
        - graphs_fixed_range (bool): If True, fix the x-axis range between graphs_x_min and graphs_x_max.
        - graphs_x_min (int): Minimum value for the x-axis range in the graphs.
        - graphs_x_max (int): Maximum value for the x-axis range in the graphs.
    """
    #Stablish which columns contain AO and AO_rate info
    ao_columns = [col for col in merged_df.columns if col.endswith("AO")]
    ao_rate_columns = [col for col in merged_df.columns if col.endswith("AO_rate")]
    variant_frequency_columns = [col for col in merged_df.columns if col.endswith("Variant_frequency_(%)")]

    # Reorder the DataFrame columns
    ordered_columns = ['POS', 'REF', 'ALT', 'CIGAR', 'TYPE'] + ao_columns + ao_rate_columns + variant_frequency_columns
    merged_df = merged_df[ordered_columns]


    #EXTRACT DATA:

    # Remove NaN rows from merged_df to obtain only shared variants
    df_with_shared_variants = merged_df.dropna().reset_index()
    # Shared variants data:
    total_variants_shared = merged_df.dropna()[ao_columns].sum().sum()
    total_unique_variants_shared = len(merged_df.dropna())
    df_with_shared_positions = merged_df.dropna().groupby('POS').sum(numeric_only=True).reset_index()
    total_positions_shared = len(merged_df.dropna().groupby('POS')[ao_columns].sum(numeric_only=True))

    # Substitute NaN values from the merged_df with zeros to conserve all variants
    merged_df.fillna(0, inplace=True)
    # All variants data:
    grouped_merged_df = merged_df.groupby('POS').sum(numeric_only=True).reset_index()
    total_variants = merged_df[ao_columns].sum().sum(numeric_only=True)
    total_unique_variants = len(merged_df)

    # All indels and SNPs data:
    indels_df = merged_df[(merged_df["TYPE"] == "ins") | (merged_df["TYPE"] == "del")]
    grouped_indels_df = indels_df.groupby('POS').sum(numeric_only=True).reset_index()
    ins_df = merged_df[(merged_df["TYPE"] == "ins")]
    grouped_ins_df = ins_df.groupby('POS').sum(numeric_only=True).reset_index()
    del_df = merged_df[(merged_df["TYPE"] == "del")]
    grouped_del_df = del_df.groupby('POS').sum(numeric_only=True).reset_index()
    snp_df = merged_df[(merged_df["TYPE"] == "snp")]
    grouped_snp_df = snp_df.groupby('POS').sum(numeric_only=True).reset_index()
    df_TC_snp = merged_df[(merged_df['REF'] == 'T') & (merged_df['ALT'] == 'C')] #df with SNPs of the type T-->C

    # SNP types data:
    dict_snp_types = create_dict_snp_types(merged_df, ao_columns)
    #if all values dict_snp_types are "nan", assign the value 0 to each substitution type
    if all(math.isnan(value) for value in dict_snp_types.values()):
      dict_snp_types = {'TC': 0, 'TG': 0, 'AC': 0, 'AG': 0, 'AT': 0, 'GA': 0, 'GC': 0, 'GT': 0, 'CT': 0, 'CA': 0, 'TA': 0, 'CG': 0}
    df_snp_types = create_df_snp_types(dict_snp_types)

    # Create summary dataframe with previous data:
    summary_dict = {
      'Total variants': [total_variants],
      'Total unique variants': [total_unique_variants],
      'Total Variants shared': [total_variants_shared],
      'Total unique variants shared': [total_unique_variants_shared],
      'Total positions shared': [total_positions_shared]
    }
    df_summary = pd.DataFrame(summary_dict)

    # Create dataframe with the sum of AO and the mean of AO_rate from all the samples
    double_merged_df = merged_df.copy()
    double_merged_df["Sum_AO"] = merged_df[ao_columns].sum(axis=1)
    double_merged_df["Mean_AO_rate"] = merged_df[ao_rate_columns].mean(axis=1)
    double_merged_df["Mean_variant_frequency_(%)"] = merged_df[variant_frequency_columns].mean(axis=1)
    double_merged_df.drop(columns=ao_columns + ao_rate_columns + variant_frequency_columns, inplace=True)

    #Create csv file with the double_merged_df
    csv_file = output_prefix + ".csv"
    double_merged_df.to_csv(csv_file, index=False)

    # Create excel file with all the extracted data
    excel_file = output_prefix + '.xlsx'
    with pd.ExcelWriter(excel_file, engine='xlsxwriter') as writer:
        double_merged_df.to_excel(writer, sheet_name='Mean data', index=False)
        merged_df.to_excel(writer, sheet_name='Merged dataframes', index=False)
        grouped_merged_df.to_excel(writer, sheet_name='Variants per position', index=False)
        df_with_shared_positions.to_excel(writer, sheet_name='Shared positions', index=True)
        df_with_shared_variants.to_excel(writer, sheet_name='Shared variants', index=True)
        indels_df.to_excel(writer, sheet_name='Indels', index=False)
        ins_df.to_excel(writer, sheet_name='Insertions', index=False)
        del_df.to_excel(writer, sheet_name='Deletions', index=False)
        df_snp_types.to_excel(writer, sheet_name='SNP types', index=True)
        df_TC_snp.to_excel(writer, sheet_name='TC_snp', index=False)
        df_summary.to_excel(writer, sheet_name='Basic data', index=False)

    #Create heatmap image of the SNP types (shown as percentages)
    plt.clf()
    total_sum = sum(dict_snp_types.values())
    if total_sum != 0:
      dict_snp_types_percentage = {key: (value / total_sum) * 100 for key, value in dict_snp_types.items()}
    else:
      dict_snp_types_percentage = dict_snp_types
    df_table = create_df_snp_types(dict_snp_types_percentage)
    plt.figure(figsize=(6, 4))
    sns.heatmap(df_table.astype(float), cmap='coolwarm', annot=True, fmt=".1f", cbar_kws={'label': '%'})
    plt.gca().xaxis.set_label_position('top')
    plt.xlabel('Nucleotide substitutions')
    plt.ylabel('Reference nucleotides')
    plt.suptitle(output_prefix)
    plt.subplots_adjust(top=0.80)
    plt.gca().tick_params(axis='x', top=True, bottom=False, labeltop=True, labelbottom=False) #Adjust position of ticks and x-axis labels
    plt.text(0.5, -0.1, f"Total number of substitutions: {total_sum}", transform=plt.gca().transAxes, ha='center') #Add total number of substitutions as text in the image
    heatmap_snp_types_graph = output_prefix + 'heatmap_snp_types.png'
    plt.savefig(heatmap_snp_types_graph)
    plt.show()
    plt.close()

    #Create cumulative graphs
    cumulative_graph_ao = create_cumulative_graph(grouped_merged_df, ao_columns, output_prefix, "total_ao", graphs_x_axis_size, graphs_y_axis_size, graphs_log_scale, graphs_detailed, graphs_fixed_range, graphs_x_min, graphs_x_max)
    cumulative_graph_ao_rate = create_cumulative_graph(grouped_merged_df, ao_rate_columns, output_prefix, "total_ao_rate", graphs_x_axis_size, graphs_y_axis_size, graphs_log_scale, graphs_detailed, graphs_fixed_range, graphs_x_min, graphs_x_max)
    indels_graph = create_cumulative_graph(grouped_indels_df, ao_rate_columns, output_prefix, "indels_rate_graph", graphs_x_axis_size, graphs_y_axis_size, graphs_log_scale, graphs_detailed, graphs_fixed_range, graphs_x_min, graphs_x_max)
    snp_graph = create_cumulative_graph(grouped_snp_df, ao_rate_columns, output_prefix, "snp_rate_graph", graphs_x_axis_size, graphs_y_axis_size, graphs_log_scale, graphs_detailed, graphs_fixed_range, graphs_x_min, graphs_x_max)
    tc_snp_graph = create_cumulative_graph(df_TC_snp, ao_rate_columns, output_prefix, "tc_snp_rate_graph", graphs_x_axis_size, graphs_y_axis_size, graphs_log_scale, graphs_detailed, graphs_fixed_range, graphs_x_min, graphs_x_max)

    return csv_file, excel_file, heatmap_snp_types_graph, cumulative_graph_ao, cumulative_graph_ao_rate, indels_graph, snp_graph, tc_snp_graph





## Main program
##-------------

## Step 1: Parameters catching
##--------------------

print("\tStep 1 => Parameters catching")

input_files_list = uploaded_files_list
output_prefix
graphs_x_axis_size
graphs_y_axis_size
graphs_log_scale
graphs_detailed
graphs_fixed_range
graphs_x_min
graphs_x_max

print("\tStep 1 done\n")



## Step 2: Creating merged dataframe
##--------------------

print("\tStep 2: Creating merged dataframe")

merged_df = csvs_to_df(input_files_list)

print("\tStep 2 done\n")




## Step 3: Extracting data and creating graphs and tables
##--------------------

print("\tStep 3 => Extracting data and creating graphs and tables")

csv_file, excel_file, heatmap_snp_types_graph, cumulative_graph_ao, cumulative_graph_ao_rate, indels_graph, snp_graph, tc_snp_graph = process_merged_df(merged_df, output_prefix, graphs_x_axis_size, graphs_y_axis_size, graphs_log_scale, graphs_detailed, graphs_fixed_range, graphs_x_min, graphs_x_max) #Extract data from the merged_df and create graphs and tables

print("\tStep 3 done\n")






print("\tJOB DONE!")



In [None]:
#@markdown # Download files

from google.colab import files

files.download(excel_file)
files.download(csv_file)
files.download(heatmap_snp_types_graph)
files.download(cumulative_graph_ao)
files.download(cumulative_graph_ao_rate)
files.download(indels_graph)
files.download(snp_graph)
files.download(tc_snp_graph)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Extra content

In [None]:
#@markdown # Show AO Plotly Graph
#@markdown  - Select this option if you want to download the generated graph:
download_graph = False #@param {type:"boolean"}



## Imports
##----------
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from google.colab import files





## Main program
##-------------

# Extract AO values from the grouped and merged dataframe
AO_values = grouped_merged_df[ao_columns]

# Create a cumulative bar graph using Plotly
fig = go.Figure()

# Variable to store the cumulative height of the previous bars
bottom = np.zeros(len(grouped_merged_df['POS']))

# Iterate over AO columns and add traces to the graph
for col in ao_columns:
    fig.add_trace(
        go.Bar(x=grouped_merged_df['POS'], y=AO_values[col], name=col, marker=dict(opacity=0.8), legendgroup=col,
        hovertemplate=f"Position: %{{x}}<br>AO: %{{y}}<br>ID: {col}<extra></extra>"),
    )
    bottom += AO_values[col]

# Update the layout of the graph
fig.update_layout(
    xaxis=dict(title='Position'),
    yaxis=dict(title='Variants'),
    title=output_prefix + "_total_ao",
    barmode='stack',
)

# Set y-axis to log scale if specified
if graphs_log_scale:
    fig.update_layout(yaxis_type='log')

# Customize x-axis ticks and labels if detailed graphs are requested
if graphs_detailed:
    positions = grouped_merged_df['POS'].unique()
    fig.update_layout(xaxis=dict(tickvals=positions, ticktext=positions, tickangle=90))

# Set x-axis range if a fixed range is specified
if graphs_fixed_range:
    fig.update_xaxes(range=[graphs_x_min, graphs_x_max])

# Show the graph
fig.show()

# Download graph if the option is selected
if download_graph:
  cumulative_graph = output_prefix + "_total_ao_graph_plotly.html"
  fig.write_html(cumulative_graph)
  files.download(cumulative_graph)


In [None]:
#@markdown # Show indels Plotly Graph
#@markdown  - Select this option if you want to download the generated graph:
download_graph = False #@param {type:"boolean"}

## Imports
##----------
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from google.colab import files





## Main program
##-------------




# Extract AO values from the grouped indels dataframe
AO_values = grouped_indels_df[ao_rate_columns]

# Create a cumulative bar graph using Plotly
fig = go.Figure()

# Variable to store the cumulative height of the previous bars
bottom = np.zeros(len(grouped_indels_df['POS']))

# Iterate over AO rate columns and add traces to the graph
for col in ao_rate_columns:
    fig.add_trace(
        go.Bar(x=grouped_indels_df['POS'], y=AO_values[col], name=col, marker=dict(opacity=0.8), legendgroup=col,
                hovertemplate=f"Position: %{{x}}<br>AO_rate: %{{y}}<br>ID: {col}<extra></extra>"),
    )
    bottom += AO_values[col]

# Update the layout of the graph
fig.update_layout(
    xaxis=dict(title='Position'),
    yaxis=dict(title='Variants'),
    title=output_prefix + "_indels",
    barmode='stack',
)

# Set y-axis to log scale if specified
if graphs_log_scale:
    fig.update_layout(yaxis_type='log')

# Customize x-axis ticks and labels if detailed graphs are requested
if graphs_detailed:
    positions = grouped_indels_df['POS'].unique()
    fig.update_layout(xaxis=dict(tickvals=positions, ticktext=positions, tickangle=90))

# Set x-axis range if a fixed range is specified
if graphs_fixed_range:
    fig.update_xaxes(range=[graphs_x_min, graphs_x_max])

# Show the graph
fig.show()

# Download graph if the option is selected
if download_graph:
  cumulative_graph = output_prefix + "_indels_graph_plotly.html"
  fig.write_html(cumulative_graph)
  files.download(cumulative_graph)


