# Visualize Transcriptomic data

In [46]:
import sys
sys.stdout = open('/dev/null', 'w')
!pip install -r requirements.txt
!pip install bioinfokit
!pip install gseapy
!pip install seaborn

In [58]:
import os
import ipywidgets as widgets
from IPython.display import display
import numpy as np
import pandas as pd
import warnings
import matplotlib.pyplot as plt
from bioinfokit import visuz
import gseapy as gp
import ipywidgets as widgets
from IPython.display import display,clear_output, HTML
import os
import seaborn as sns
# Directory to save the uploaded file
save_dir = "internal data"

# Create the directory if it doesn't exist
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

# Define the toggle buttons (mutually exclusive selection)
toggle_buttons = widgets.ToggleButtons(
    options=['Import CSV file',  'Demo'],
    description='Select Option:',
    disabled=False,
    button_style='',  # Can be 'success', 'info', 'warning', 'danger' for different styles
    tooltips=['Import a CSV file', 'Use demo data'],
)

# Create a FileUpload widget
file_upload = widgets.FileUpload(
    accept='.csv',  # Accept CSV files
    multiple=False  # Only allow single file uploads
)

# Create an output widget to display messages
output = widgets.Output()

# Button to submit the selection
button = widgets.Button(description='Submit')

# Function to update the displayed widgets based on the selected option
def update_widgets(*args):
    selected_option = toggle_buttons.value
    # Update file upload visibility
    if selected_option == 'Import CSV file':
        file_upload_container.children = [file_upload]  # Show file upload widget if Import VTK file is selected
    else:
        file_upload_container.children = []  # Hide file upload widget for other selections

# Attach the update function to the toggle buttons
toggle_buttons.observe(update_widgets, names='value')

# Function to handle the button click event
def on_button_click(b):
    with output:
        output.clear_output()  # Clear previous output
        selected_option = toggle_buttons.value
        
        if selected_option == 'Demo':
            print("Loading demonstration data...")
            demo_data = "internal data/demo_expression_data.csv"
            data1 = pd.read_csv(demo_data, skiprows=[29675])
        
        elif selected_option == 'Import CSV file':
            if file_upload.value:
                # Process the file upload
                handle_file_upload()
                input_data = "internal data/imported.csv"
                data1 = pd.read_csv(input_data, skiprows=[29675])
            else:
                print("No file uploaded. Please upload a CSV file using the file upload widget.")
        
        else:
            print("No Data source is defined. Please choose an option.")
        
        display("Genetic expression profile")
        pvalue = 0.05
        logFC = 0
        data1.columns = ["Gene", "P.Value", "Log FC"]
        data1["expression"] = "Not differentially expressed"
        data1.loc[((data1["P.Value"]<pvalue) &(data1["Log FC"]>logFC)), "expression"]= "Down regulated"
        data1.loc[((data1["P.Value"]<pvalue) &(data1["Log FC"]< -1*logFC)), "expression"]= "Up regulated"
        display(data1.head())  # Display the dataframe

        display("Volcano Plot")

        # St thresholds for significance
        logFC_threshold = 1
        pvalue_threshold = 0.05

        # Calculate -log10(p-value)
        data1['-log10(P.Value)'] = -np.log10(data1['P.Value'])

        # Create the plot
        plt.figure(figsize=(10, 8))
        plt.scatter(data1['Log FC'], data1['-log10(P.Value)'], color='gray')

        # Highlight significant points
        significant = (abs(data1['Log FC']) > logFC_threshold) & (data1['P.Value'] < pvalue_threshold)
        plt.scatter(data1.loc[significant, 'Log FC'], data1.loc[significant, '-log10(P.Value)'], color='red')

        # Add labels and title
        plt.title('Volcano Plot', fontsize=20)
        plt.xlabel('Log2 Fold Change', fontsize=16)
        plt.ylabel('-Log10 P-value', fontsize=16)

        # Optional: Annotate points with gene names
        #for i in data1.index:
        #    if significant[i]:
        #        plt.text(data1['Log FC'][i], data1['-log10(P.Value)'][i], data1['Gene'][i], fontsize=1)

        # Draw thresholds
        plt.axhline(-np.log10(pvalue_threshold), color='blue', linestyle='--')
        plt.axvline(logFC_threshold, color='blue', linestyle='--')
        plt.axvline(-logFC_threshold, color='blue', linestyle='--')

        # Show plot
        plt.show()

        # Filter the DataFrame for upregulated genes
        Genes = data1[data1['expression'] != "Not differentially expressed"]['Gene']

        # Convert the filtered gene list to a list
        gene_list = Genes.tolist()
        display("Genetic Ontology enrichment analysis")
        # Perform GO enrichment analysis
        enr = gp.enrichr(gene_list=gene_list,  # List of upregulated gene symbols
                         gene_sets='GO_Biological_Process_2021',  # Use GO Biological Process
                         organism='Human',  # Organism
                         outdir=None)  # Directory to save results (None to avoid saving)

        # Convert results to DataFrame
        results_df = pd.DataFrame(enr.results)

        # Reorder columns to place 'Gene' column first
        ordered_columns = ['Genes'] + [col for col in results_df.columns if col != 'Genes']
        results_df = results_df[ordered_columns]
        # Remove everything after the first semicolon and keep only the first gene name
        results_df['Genes'] = results_df['Genes'].apply(lambda x: x.split(';')[0] if isinstance(x, str) else x)
        display(results_df.head())  # Display the dataframe
        results_df.to_csv('go_enrichment_results.csv', index=False)
        display("Genetic Ontology enrichment Plot")
        # Plotting
        plt.figure(figsize=(12, 8))

        # Plot top 10 enriched GO terms by p-value
        top_results = results_df.head(10)
        #sns.barplot(x='P-value', y='Term', data=top_results, palette='viridis')
        sns.barplot(x='P-value', y='Term', data=top_results, hue='Term', palette='viridis', legend=False)

        # Add labels and title
        plt.xlabel('P-value')
        plt.ylabel('GO Term')
        plt.title('Top 10 Enriched GO Terms')

        # Display the plot
        plt.tight_layout()
        plt.savefig('go_enrichment_plot.png')  # Save the plot to a file
        plt.show()



# Function to handle file upload and save it to the folder
def handle_file_upload():
    with output:
        output.clear_output()  # Clear previous output
        
        if file_upload.value:
            # Get the uploaded file content
            uploaded_file = file_upload.value[0]  # Access the tuple directly
            uploaded_filename = uploaded_file['name']
            content = uploaded_file['content']  # Access the actual file content
            
            # Save the file to the specified directory
            file_path = os.path.join(save_dir, "imported.csv")
            with open(file_path, "wb") as f:
                f.write(content)
            
            print("File successfully imported")
        else:
            print("No file uploaded")

# Attach the button click event to the function
button.on_click(on_button_click)

# Container to hold the dropdown widgets (empty initially)
dropdown_container = widgets.VBox([])

# Container to hold the file upload widget (empty initially)
file_upload_container = widgets.VBox([file_upload])

# Display the widgets
display(toggle_buttons, file_upload_container, button, output)

# Initial call to set up the widgets based on default selection
update_widgets()


ToggleButtons(description='Select Option:', options=('Import CSV file', 'Demo'), tooltips=('Import a CSV file'…

VBox(children=(FileUpload(value=(), accept='.csv', description='Upload'),))

Button(description='Submit', style=ButtonStyle())

Output()