<a href="https://colab.research.google.com/github/HebaF/metagenomics_pipeline/blob/main/Vir_o_matic_v2_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Block 1: Imports and Initial Setup
from google.colab import drive, files, userdata
import google.generativeai as genai
import ipywidgets as widgets
from IPython.display import display, HTML
import plotly.express as px
import pandas as pd
import numpy as np
# Nextflow setup
def setup_nextflow_conda():
    """Install and setup Nextflow and required tools in Colab"""
    setup_commands = """
    # Install Java
    apt-get update && apt-get install -y default-jdk

    # Install Nextflow
    curl -s https://get.nextflow.io | bash
    mv nextflow /usr/local/bin/

    # Setup conda environment
    conda create -n nf_env -c bioconda -c conda-forge -y \
        fastqc=0.11.9 \
        fastp=0.23.2 \
        kraken2=2.1.2 \
        krona=2.8.1
    """

    !{setup_commands}

    # Verify installations
    !nextflow -version
    !conda list -n nf_env
# For progress tracking
from tqdm.notebook import tqdm
from datetime import datetime
import os
from IPython.display import FileLink, HTML
# Mount Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Block 2: Environment Setup (Part 1 - Install Conda and Tools)
%%bash
# Update and install dependencies
apt-get update
apt-get install -y wget

# Install Miniconda and initialize
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh -b -p /usr/local/miniconda
export PATH="/usr/local/miniconda/bin:$PATH"

# Initialize conda in bash
eval "$(/usr/local/miniconda/bin/conda shell.bash hook)"
/usr/local/miniconda/bin/conda init bash

# Create new environment with required tools
/usr/local/miniconda/bin/conda create -n mg_env -c bioconda -c conda-forge -y \
    fastqc=0.11.9 \
    fastp=0.23.2 \
    kraken2=2.1.2 \
    krona=2.8.1

# Activate environment
source /usr/local/miniconda/bin/activate mg_env

Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Hit:2 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Get:4 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:5 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,199 kB]
Get:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:8 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Hit:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Get:11 https://r2u.stat.illinois.edu/ubuntu jammy/main all Packages [8,591 kB]
Hit:12 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:13 http://archive.ubuntu.com/ubuntu jammy-upda

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
--2025-01-12 13:01:49--  https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
Resolving repo.anaconda.com (repo.anaconda.com)... 104.16.191.158, 104.16.32.241, 2606:4700::6810:20f1, ...
Connecting to repo.anaconda.com (repo.anaconda.com)|104.16.191.158|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 147784736 (141M) [application/octet-stream]
Saving to: ‘Miniconda3-latest-Linux-x86_64.sh’

     0K .......... .......... .......... .......... ..........  0% 3.22M 44s
    50K .......... .......... .......... .......... ..........  0% 3.15M 44s
   100K .......... .......... .......... .......... ..........  0% 3.36M 43s
   150K .......... .......... .......... .......... ..........  0% 41.9M 33s
   200K .......... .......... .......... .......... ..........  0% 

In [3]:
# Block 3: Environment Setup (Part 2 - Create Directories)
import os

def setup_directories():
    """Create all necessary directories"""
    # List of directories to create
    directories = [
        '/content/virus_analysis',
        '/content/virus_analysis/results',
        '/content/virus_analysis/temp',
        '/content/virus_analysis/qc_results',
        '/content/virus_analysis/trimmed',
        '/content/virus_analysis/kraken_results'
    ]

    # Create directories
    for dir_path in directories:
        !mkdir -p {dir_path}
        print(f"Created directory: {dir_path}")

    # Create directory in Google Drive
    !mkdir -p '/content/drive/MyDrive/virus_analysis'
    print("Created directory in Google Drive")

    return {
        'local_dir': '/content/virus_analysis',
        'drive_dir': '/content/drive/MyDrive/virus_analysis'
    }

# Create directories
dirs = setup_directories()

# Verify directory creation
!ls -l /content/virus_analysis

Created directory: /content/virus_analysis
Created directory: /content/virus_analysis/results
Created directory: /content/virus_analysis/temp
Created directory: /content/virus_analysis/qc_results
Created directory: /content/virus_analysis/trimmed
Created directory: /content/virus_analysis/kraken_results
Created directory in Google Drive
total 20
drwxr-xr-x 2 root root 4096 Jan 12 13:04 kraken_results
drwxr-xr-x 2 root root 4096 Jan 12 13:04 qc_results
drwxr-xr-x 2 root root 4096 Jan 12 13:04 results
drwxr-xr-x 2 root root 4096 Jan 12 13:04 temp
drwxr-xr-x 2 root root 4096 Jan 12 13:04 trimmed


In [15]:
# Block for conda operations (use this format for any conda commands)
%%bash
# Add conda to PATH and initialize
export PATH="/usr/local/miniconda/bin:$PATH"
source /usr/local/miniconda/etc/profile.d/conda.sh

# Now we can use conda commands
conda activate mg_env

# Verify installation
echo "Conda version:"
conda --version

echo -e "\nConda environments:"
conda env list

echo -e "\nInstalled tools:"
which fastqc
which fastp
which kraken2
which ktImportTaxonomy

Conda version:
conda 24.11.1

Conda environments:

# conda environments:
#
base                   /usr/local/miniconda
mg_env               * /usr/local/miniconda/envs/mg_env


Installed tools:
/usr/local/miniconda/envs/mg_env/bin/fastqc
/usr/local/miniconda/envs/mg_env/bin/fastp
/usr/local/miniconda/envs/mg_env/bin/kraken2
/usr/local/miniconda/envs/mg_env/bin/ktImportTaxonomy


In [16]:
# Block 5: Initialize Chatbot and Define Helper Functions

# Verify Colab Session Type
def initialize_session():
    """Setup and verify Colab session"""
    # Enable widgets
    display(HTML("""
        <script>
            google.colab.output.setIframeHeight(0, true, {maxHeight: 5000})
        </script>
    """))

    # Check session type
    runtime_type = !nvidia-smi | grep "GPU"
    is_gpu = len(runtime_type) > 0

    return {
        'is_gpu': is_gpu,
        'session_id': hash(str(datetime.now())),
        'start_time': datetime.now()
    }

# Initialize session
session_info = initialize_session()

# Initialize Gemini
genai.configure(api_key=userdata.get("GOOGLE_API_KEY"))
model = genai.GenerativeModel(
    "gemini-2.0-flash-exp",
    system_instruction="You are a helpful assistant chatbot who guides users through the steps of metagenomic analysis for virus detection in a google colab environment.",
)
chat = model.start_chat()

In [22]:
#Block 6: Chatbot Helper Functions and Widget
#Functions for QC recommendations and Result Interpretation
def get_qc_recommendations(chat, fastqc_results):
    prompt = f"""
    Based on these FastQC results:
    {fastqc_results}

    What QC parameters would you recommend for preprocessing these reads?
    Please consider:
    1. Quality score thresholds
    2. Read length filtering
    3. Adapter trimming
    4. Any other relevant parameters
    """
    response = chat.send_message(prompt)
    return response.text

def interpret_results(chat, kraken_report):
    prompt = f"""
    Based on these Kraken2 classification results:
    {kraken_report}

    Please provide:
    1. Summary of viral composition
    2. Notable findings
    3. Potential quality concerns
    4. Recommendations for further analysis
    """
    response = chat.send_message(prompt)
    return response.text

def create_progress_display():
    """Create Colab-friendly progress display"""
    progress_output = widgets.Output()
    status_html = widgets.HTML(value="")

    def update_progress(step, message):
        status_html.value = f"""
        <div style="padding: 10px; margin: 5px; border-left: 3px solid #2196F3;">
            <b>Step {step}/5:</b> {message}
        </div>
        """

    return progress_output, status_html, update_progress

# Analysis Guide Function
def guide_analysis(chat):
    # Create widgets
    question_text = widgets.Text(
        value='',
        placeholder='Type your question here',
        description='Question:',
        disabled=False,
        layout=widgets.Layout(width='50%')
    )
    send_button = widgets.Button(
        description="Send",
        button_style='info',
        tooltip='Send your question'
    )
    done_button = widgets.Button(
        description="Done",
        button_style='warning',
        tooltip='Finish conversation'
    )
    chat_output = widgets.Output()
    loading_text = widgets.HTML(value="")

    # Add quick questions buttons
    quick_questions = [
        "How do I start?",
        "What do I do first?",
        "Help with QC parameters",
        "Explain Kraken2 results"
    ]

    quick_buttons = [widgets.Button(
        description=q,
        button_style='info',
        layout=widgets.Layout(width='auto')
    ) for q in quick_questions]

    def on_quick_button_clicked(b):
        question_text.value = b.description
        on_send_button_clicked(None)

    for button in quick_buttons:
        button.on_click(on_quick_button_clicked)

    quick_buttons_box = widgets.HBox(quick_buttons)

    with chat_output:
        try:
            initial_prompt = """
            Welcome! I'm here to help you with your metagenomic analysis.
            You can ask me questions about:
            1. Data preparation and quality control
            2. Running Kraken2 analysis
            3. Interpreting results
            4. Troubleshooting

            What would you like to know?
            """
            response = chat.send_message(initial_prompt)
            print("Chatbot:", response.text)
        except Exception as e:
            print(f"Error initializing chat: {str(e)}")

    def on_send_button_clicked(b):
        send_button.disabled = True
        question_text.disabled = True
        loading_text.value = "<i>Processing...</i>"

        with chat_output:
            try:
                user_question = question_text.value
                print("\nYou:", user_question)
                response = chat.send_message(user_question)
                print("\nChatbot:", response.text)
            except Exception as e:
                print(f"\nError: {str(e)}")
                print("Please try asking your question again.")
            finally:
                send_button.disabled = False
                question_text.disabled = False
                loading_text.value = ""
                question_text.value = ''

    def on_done_button_clicked(b):
        question_text.disabled = True
        send_button.disabled = True
        done_button.disabled = True
        with chat_output:
            print("\nMoving to analysis...")

    question_text.on_submit(lambda x: on_send_button_clicked(None))
    send_button.on_click(on_send_button_clicked)
    done_button.on_click(on_done_button_clicked)

    # Organize widgets in a nicer layout
    input_area = widgets.HBox([question_text, send_button, done_button])
    chat_container = widgets.VBox([
        widgets.HTML(value="<h3>Step 2: Interactive Analysis Guide</h3>"),
        quick_buttons_box,
        input_area,
        loading_text,
        chat_output
    ])

    return chat_container

def create_upload_widget():
    """Create file upload interface"""
    global input_file  # Declare global variable
    input_file = None  # Initialize input file

    upload_output = widgets.Output()
    upload_status = widgets.HTML(value="")

    # Add file format selection
    file_format = widgets.Dropdown(
        options=['FASTQ/FASTQ.GZ', 'TAR/TAR.GZ', 'ZIP'],
        value='FASTQ/FASTQ.GZ',
        description='File type:',
        style={'description_width': 'initial'}
    )

    upload_button = widgets.Button(
        description='Upload File',
        button_style='primary',
        icon='upload',
        tooltip='Click to upload your file'
    )

    def process_archive(uploaded_file, file_type):
        """Process uploaded archive files"""
        import tarfile
        import zipfile
        import gzip
        import shutil
        import os

        # Create temp directory
        !mkdir -p ./temp_files

        if file_type == 'TAR/TAR.GZ':
            # Extract tar file
            with tarfile.open(uploaded_file, 'r:*') as tar:
                tar.extractall('./temp_files')

            # Look for FASTQ files
            fastq_files = !find ./temp_files -type f -name "*.fastq*" -o -name "*.fq*"
            return fastq_files

        elif file_type == 'ZIP':
            # Extract zip file
            with zipfile.ZipFile(uploaded_file, 'r') as zip_ref:
                zip_ref.extractall('./temp_files')

            # Look for FASTQ files
            fastq_files = !find ./temp_files -type f -name "*.fastq*" -o -name "*.fq*"
            return fastq_files

    def on_upload_button_clicked(b):
        global input_file

        with upload_output:
            upload_output.clear_output()
            upload_status.value = "<i>Uploading...</i>"
            try:
                print(f"Select your {file_format.value} file...")
                uploaded = files.upload()

                if not uploaded:
                    upload_status.value = "<b>Error:</b> No file selected"
                    return

                uploaded_file = list(uploaded.keys())[0]

                # Process based on file type
                if file_format.value == 'FASTQ/FASTQ.GZ':
                    if uploaded_file.endswith(('.fastq', '.fq', '.fastq.gz', '.fq.gz')):
                        input_file = uploaded_file
                        upload_status.value = f"<b>Uploaded:</b> {input_file}"
                        print(f"\nSuccessfully uploaded: {input_file}")
                    else:
                        upload_status.value = "<b>Error:</b> Invalid FASTQ file format"
                        return

                elif file_format.value in ['TAR/TAR.GZ', 'ZIP']:
                    print("\nExtracting files...")
                    fastq_files = process_archive(uploaded_file, file_format.value)

                    if not fastq_files:
                        upload_status.value = "<b>Error:</b> No FASTQ files found in archive"
                        return

                    # If multiple FASTQ files found, let user select
                    if len(fastq_files) > 1:
                        print("\nMultiple FASTQ files found. Please select one:")
                        file_selector = widgets.Dropdown(
                            options=fastq_files,
                            description='Select file:',
                            style={'description_width': 'initial'}
                        )

                        def on_file_selected(change):
                            global input_file  # Use nonlocal here too
                            input_file = change.new
                            upload_status.value = f"<b>Selected:</b> {input_file}"
                            print(f"\nSelected file: {input_file}")

                        file_selector.observe(on_file_selected, names='value')
                        display(file_selector)
                    else:
                        input_file = fastq_files[0]
                        upload_status.value = f"<b>Extracted:</b> {input_file}"
                        print(f"\nExtracted FASTQ file: {input_file}")

            except Exception as e:
                upload_status.value = f"<b>Error:</b> {str(e)}"
                print("Upload/extraction failed. Please try again.")

    upload_button.on_click(on_upload_button_clicked)

    # Organize upload widgets
    upload_container = widgets.VBox([
        widgets.HTML(value="<h3>Step 1: Upload Your Data</h3>"),
        widgets.HBox([file_format, upload_button]),
        upload_status,
        upload_output
    ])

    return upload_container, input_file  # Return widget and file path

# Main layout setup
upload_widget, input_file = create_upload_widget()  # Get both return values
chat_container = guide_analysis(chat)

# Organize everything in a main container
main_container = widgets.VBox([
    widgets.HTML(value="<h2>Vir-o-matic Metagenomic Analysis</h2>"),
    upload_widget,
    chat_container
])

display(main_container)

VBox(children=(HTML(value='<h2>Vir-o-matic Metagenomic Analysis</h2>'), VBox(children=(HTML(value='<h3>Step 1:…

In [None]:
# Block 7: Nextflow Pipeline
# Create Nextflow pipeline script with conda
def create_nextflow_pipeline():
    """Create Nextflow pipeline script using conda environments"""
    pipeline_script = """
    #!/usr/bin/env nextflow

    // Enable DSL2
    nextflow.enable.dsl=2

    // Parameters
    params.reads = "input.fastq.gz"
    params.outdir = "results"
    params.krakendb = "viral_db"

    // Process 1: FastQC
    process FASTQC {
        conda "bioconda::fastqc=0.11.9"

        input:
        path reads

        output:
        path "fastqc_output"

        script:
        '''
        mkdir fastqc_output
        fastqc -o fastqc_output !{reads}
        '''
    }

    // Process 2: Fastp
    process FASTP {
        conda "bioconda::fastp=0.23.2"

        input:
        path reads

        output:
        tuple path("trimmed/trimmed_R*.fq.gz"), emit: trimmed_reads
        path "qc_results"

        script:
        '''
        mkdir -p trimmed qc_results
        fastp --in1 !{reads} \
              --out1 trimmed/trimmed_R1.fq.gz \
              --out2 trimmed/trimmed_R2.fq.gz \
              --detect_adapter_for_pe \
              --qualified_quality_phred 20 \
              --length_required 50 \
              --cut_front \
              --cut_tail \
              --cut_mean_quality 20 \
              --json qc_results/fastp.json \
              --html qc_results/fastp.html \
              --thread !{task.cpus}
        '''
    }

    // Process 3: Kraken2
    process KRAKEN2 {
        conda "bioconda::kraken2=2.1.2"

        input:
        tuple path(read1), path(read2)
        path db

        output:
        path "kraken_output"

        script:
        '''
        mkdir kraken_output
        kraken2 --db !{db} \
                --threads !{task.cpus} \
                --paired \
                --output kraken_output/kraken2_output.txt \
                --report kraken_output/kraken2_report.txt \
                !{read1} !{read2}
        '''
    }

    // Process 4: Krona
    process KRONA {
        conda "bioconda::krona=2.8.1"

        input:
        path kraken_output

        output:
        path "krona_plot.html"

        script:
        '''
        ktImportTaxonomy \
            -q 2 -t 3 \
            !{kraken_output}/kraken2_output.txt \
            -o krona_plot.html
        '''
    }

    // Main workflow
    workflow {
        // Input channel
        reads_ch = channel.fromPath(params.reads)

        // Run processes
        fastqc_results = FASTQC(reads_ch)
        fastp_results = FASTP(reads_ch)
        kraken_results = KRAKEN2(fastp_results.trimmed_reads, params.krakendb)
        krona_results = KRONA(kraken_results)

        // Publish results
        publish:
        fastqc_results to: "${params.outdir}/fastqc"
        fastp_results to: "${params.outdir}/fastp"
        kraken_results to: "${params.outdir}/kraken"
        krona_results to: "${params.outdir}/krona"
    }
    """

    # Write pipeline to file
    with open('viral_pipeline.nf', 'w') as f:
        f.write(pipeline_script)

    # Create simplified config file for conda
    nextflow_config = """
    conda.enabled = true

    process {
        withName: FASTQC {
            cpus = 2
            memory = '4 GB'
        }
        withName: FASTP {
            cpus = 4
            memory = '8 GB'
        }
        withName: KRAKEN2 {
            cpus = 8
            memory = '16 GB'
        }
        withName: KRONA {
            cpus = 2
            memory = '4 GB'
        }
    }
    """

    with open('nextflow.config', 'w') as f:
        f.write(nextflow_config)

# Function to run Nextflow pipeline
def run_nextflow_pipeline(input_file):
    """Run Nextflow pipeline with input file"""
    try:
        # Prepare command
        cmd = f"""
        nextflow run viral_pipeline.nf \
            --reads {input_file} \
            --outdir results \
            --krakendb ~/kraken2_db/viral \
            -with-conda
        """

        # Run pipeline with progress tracking
        with tqdm(total=4, desc="Pipeline Progress") as pbar:
            process = subprocess.Popen(
                cmd,
                shell=True,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                universal_newlines=True
            )

            # Monitor pipeline progress
            while True:
                output = process.stdout.readline()
                if output == '' and process.poll() is not None:
                    break
                if output:
                    if "[100%] Process complete" in output:
                        pbar.update(1)
                    print(output.strip())

        return True
    except Exception as e:
        print(f"Error running Nextflow pipeline: {str(e)}")
        return False


In [None]:
# Block 8: Main Analysis Function
# Main function
def main():
    chat_output = guide_analysis(chat)

    if 'input_file' not in globals():
        with chat_output:
            print("Please upload a file first")
        return

    with chat_output:
        print("\nSetting up analysis environment...")
        setup_nextflow_conda()

        print("\nCreating pipeline...")
        create_nextflow_pipeline()

        print("\nRunning pipeline...")
        if run_nextflow_pipeline(input_file):
            print("\nPipeline completed successfully!")

    # Create necessary directories
    !mkdir -p {qc_results,trimmed,kraken_results}

    with chat_output:
        print("\nStarting analysis pipeline...")

        # Step 1: Run FastQC
        print("\nRunning FastQC...")
        !fastqc {input_file} -o ./qc_results

        try:
            with open('./qc_results/fastqc_data.txt', 'r') as f:
                fastqc_results = f.read()

            # Get QC recommendations from chatbot
            qc_recommendations = get_qc_recommendations(chat, fastqc_results)
            print("\nRecommended QC parameters:")
            print(qc_recommendations)

            # Step 2: Run Fastp with recommended parameters
            print("\nRunning Fastp for quality control...")
            !fastp \
                --in1 {input_file} \
                --out1 ./trimmed/trimmed_R1.fq.gz \
                --out2 ./trimmed/trimmed_R2.fq.gz \
                --detect_adapter_for_pe \
                --qualified_quality_phred 20 \
                --length_required 50 \
                --cut_front \
                --cut_tail \
                --cut_mean_quality 20 \
                --json ./qc_results/fastp.json \
                --html ./qc_results/fastp.html \
                --thread 4

            # Step 3: Run Kraken2
            print("\nRunning Kraken2 classification...")
            !kraken2 \
                --db ~/kraken2_db/viral \
                --threads 4 \
                --paired \
                --output ./kraken_results/kraken2_output.txt \
                --report ./kraken_results/kraken2_report.txt \
                ./trimmed/trimmed_R1.fq.gz ./trimmed/trimmed_R2.fq.gz

            # Step 4: Generate Krona plot
            print("\nGenerating Krona visualization...")
            !ktImportTaxonomy \
                -q 2 -t 3 \
                ./kraken_results/kraken2_output.txt \
                -o ./kraken_results/krona_plot.html

            # Step 5: Interpret results
            with open('./kraken_results/kraken2_report.txt', 'r') as f:
                kraken_report = f.read()

            interpretation = interpret_results(chat, kraken_report)
            print("\nResults interpretation:")
            print(interpretation)

            # Display results
            from IPython.display import HTML
            import plotly.express as px
            import pandas as pd

            # Display FastQC report
            display(HTML('qc_results/fastqc_report.html'))

            # Display Krona plot
            display(HTML('kraken_results/krona_plot.html'))

            # Create summary visualization
            df = pd.read_csv('kraken_results/kraken2_report.txt',
                           sep='\t',
                           names=['percentage', 'reads', 'direct_reads',
                                 'level', 'taxid', 'name'])

            fig = px.sunburst(df,
                             path=['level', 'name'],
                             values='reads',
                             title='Taxonomic Classification')
            fig.show()

        except Exception as e:
            print(f"\nError during analysis: {str(e)}")
            print("Please check the input file and try again.")


In [None]:
# Block 9: UI Setup and Display
def create_upload_widget():
    upload_output = widgets.Output()
    upload_status = widgets.HTML(value="")


    # Add file format selection
    file_format = widgets.Dropdown(
        options=['FASTQ/FASTQ.GZ', 'TAR/TAR.GZ', 'ZIP'],
        value='FASTQ/FASTQ.GZ',
        description='File type:',
        style={'description_width': 'initial'}
    )

    upload_button = widgets.Button(
        description='Upload File',
        button_style='primary',
        icon='upload',
        tooltip='Click to upload your file'
    )

    def check_colab_resources():
    #Check Colab-specific resource availability
    # Check GPU
    gpu_info = !nvidia-smi
    gpu_available = len(gpu_info) > 0

    # Check disk space
    disk_info = !df -h /content

    status_html = widgets.HTML(
        value=f"""
        <div style="background-color: #f8f9fa; padding: 10px; border-radius: 5px;">
            <h4>Available Resources:</h4>
            <p>GPU: {'Available' if gpu_available else 'Not Available'}</p>
            <p>Storage: {disk_info[-1].split()[3]}</p>
        </div>
        """
    )
    return status_html


    def process_archive(uploaded_file, file_type):
        """Process uploaded archive files"""
        import tarfile
        import zipfile
        import gzip
        import shutil
        import os

        # Create temp directory
        !mkdir -p ./temp_files

        if file_type == 'TAR/TAR.GZ':
            # Extract tar file
            with tarfile.open(uploaded_file, 'r:*') as tar:
                tar.extractall('./temp_files')

            # Look for FASTQ files
            fastq_files = !find ./temp_files -type f -name "*.fastq*" -o -name "*.fq*"
            return fastq_files

        elif file_type == 'ZIP':
            # Extract zip file
            with zipfile.ZipFile(uploaded_file, 'r') as zip_ref:
                zip_ref.extractall('./temp_files')

            # Look for FASTQ files
            fastq_files = !find ./temp_files -type f -name "*.fastq*" -o -name "*.fq*"
            return fastq_files

    def on_upload_button_clicked(b):
        with upload_output:
            upload_output.clear_output()
            upload_status.value = "<i>Uploading...</i>"
            try:
                print(f"Select your {file_format.value} file...")
                uploaded = files.upload()

                if not uploaded:
                    upload_status.value = "<b>Error:</b> No file selected"
                    return

                uploaded_file = list(uploaded.keys())[0]

                # Process based on file type
                if file_format.value == 'FASTQ/FASTQ.GZ':
                    if uploaded_file.endswith(('.fastq', '.fq', '.fastq.gz', '.fq.gz')):
                        global input_file
                        input_file = uploaded_file
                        upload_status.value = f"<b>Uploaded:</b> {input_file}"
                        print(f"\nSuccessfully uploaded: {input_file}")
                    else:
                        upload_status.value = "<b>Error:</b> Invalid FASTQ file format"
                        return

                elif file_format.value in ['TAR/TAR.GZ', 'ZIP']:
                    print("\nExtracting files...")
                    fastq_files = process_archive(uploaded_file, file_format.value)

                    if not fastq_files:
                        upload_status.value = "<b>Error:</b> No FASTQ files found in archive"
                        return

                    # If multiple FASTQ files found, let user select
                    if len(fastq_files) > 1:
                        print("\nMultiple FASTQ files found. Please select one:")
                        file_selector = widgets.Dropdown(
                            options=fastq_files,
                            description='Select file:',
                            style={'description_width': 'initial'}
                        )

                        def on_file_selected(change):
                            global input_file
                            input_file = change.new
                            upload_status.value = f"<b>Selected:</b> {input_file}"
                            print(f"\nSelected file: {input_file}")

                        file_selector.observe(on_file_selected, names='value')
                        display(file_selector)
                    else:
                        global input_file
                        input_file = fastq_files[0]
                        upload_status.value = f"<b>Extracted:</b> {input_file}"
                        print(f"\nExtracted FASTQ file: {input_file}")

            except Exception as e:
                upload_status.value = f"<b>Error:</b> {str(e)}"
                print("Upload/extraction failed. Please try again.")

    upload_button.on_click(on_upload_button_clicked)

    # Organize upload widgets
    upload_container = widgets.VBox([
        widgets.HTML(value="<h3>Step 1: Upload Your Data</h3>"),
        widgets.HBox([file_format, upload_button]),
        upload_status,
        upload_output
    ])

    return upload_container

# Add help button
def show_file_format_help():
    help_text = """
    Supported file formats:

    1. FASTQ/FASTQ.GZ:
       - Direct upload of .fastq, .fq files
       - Compressed .fastq.gz, .fq.gz files

    2. TAR/TAR.GZ:
       - Archived collections of FASTQ files
       - Will be automatically extracted

    3. ZIP:
       - Zipped collections of FASTQ files
       - Will be automatically extracted

    Notes:
    - For archives, all FASTQ files will be extracted
    - If multiple FASTQ files are found, you'll be prompted to select one
    - Large files may take some time to upload and extract
    """

    help_output = widgets.Output()
    with help_output:
        print(help_text)

    help_button = widgets.Button(
        description='File Format Help',
        button_style='info',
        icon='question'
    )

    def on_help_button_clicked(b):
        help_output.clear_output()
        with help_output:
            print(help_text)

    help_button.on_click(on_help_button_clicked)

    return widgets.VBox([help_button, help_output])

# Modify main container setup
def create_main_interface():
    """Create Colab-optimized interface"""
    upload_widget = create_upload_widget()
    help_widget = show_file_format_help()
    chat_container = guide_analysis(chat)
    progress_output, status_html, update_progress = create_progress_display()

    main_container = widgets.VBox([
        widgets.HTML(value="""
            <div style="background-color: #e3f2fd; padding: 15px; border-radius: 5px;">
                <h2>🧬 Virus Radar 🧬</h2>
                <p>Interactive viral metagenomic analysis in Google Colab</p>
            </div>
        """),
        check_colab_resources(),
        widgets.HBox([upload_widget, help_widget]),
        chat_container,
        status_html,
        progress_output
    ])

    return main_container, update_progress

# Replace your existing display code with:
main_container, update_progress = create_main_interface()
display(main_container)