In [None]:
import shutil
import os

# Path to Colab working directory
content_dir = "/content"

# Loop through everything inside /content and delete
for item in os.listdir(content_dir):
    item_path = os.path.join(content_dir, item)
    try:
        if os.path.isfile(item_path) or os.path.islink(item_path):
            os.remove(item_path)   # remove file or symlink
        elif os.path.isdir(item_path):
            shutil.rmtree(item_path)  # remove folder recursively
    except Exception as e:
        print(f"Failed to delete {item_path}. Reason: {e}")

print("✅ /content is now empty.")


✅ /content is now empty.


**Single Condition Viewer**

In [None]:
import pandas as pd
import math
import requests
from Bio import SeqIO
import io
from matplotlib import cm, colors
import matplotlib  # Added to fix the 'matplotlib' is not defined error
import py3Dmol
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import os
import zipfile
import glob
import numpy as np

# %matplotlib inline  # Ensure reliable plot rendering in Colab

# ---------------------------
# Function to load data
# ---------------------------
def load_data(use_existing=True,
              csv_path="/content/*.csv",
              fasta_path="/content/*.fasta"):
    global df, seq_records, condition_intensity_col, file_name, fasta_name, condition_name

    if use_existing:
        # Look for CSV in /content/
        csv_files = glob.glob(csv_path)
        if csv_files:
            file_name = csv_files[0]
            print(f"Using existing CSV file: {file_name}")
        else:
            print("No CSV found in /content/, please upload one.")
            uploaded = files.upload()
            file_name = list(uploaded.keys())[0]

        # Look for FASTA in /content/
        fasta_files = glob.glob(fasta_path)
        if fasta_files:
            fasta_name = fasta_files[0]
            print(f"Using existing FASTA file: {fasta_name}")
        else:
            print("No FASTA found in /content/, please upload one.")
            fasta_uploaded = files.upload()
            fasta_name = list(fasta_uploaded.keys())[0]
    else:
        print("Upload peptide CSV file:")
        uploaded = files.upload()
        file_name = list(uploaded.keys())[0]

        print("Upload FASTA file:")
        fasta_uploaded = files.upload()
        fasta_name = list(fasta_uploaded.keys())[0]

    # Load CSV
    df = pd.read_csv(file_name)
    if "Protein.Group" not in df.columns:
        raise ValueError("CSV must contain 'Protein.Group' column.")
    df['Protein.Group'] = df['Protein.Group'].fillna('').astype(str)

    # Identify intensity column
    intensity_cols = [col for col in df.columns if 'intensity' in col.lower()]
    if len(intensity_cols) != 1:
        raise ValueError(f"Expected exactly 1 intensity column, found: {intensity_cols}")

    # Create widget for condition naming and column mapping
    condition_text = widgets.Text(
        value='Condition',
        description='Name for Condition:',
        layout={'width': '300px'}
    )
    condition_col = widgets.Dropdown(
        options=intensity_cols,
        value=intensity_cols[0],
        description='Map to Column:'
    )
    confirm_button = widgets.Button(description="Confirm Condition")
    output_confirm = widgets.Output()
    display(widgets.VBox([condition_text, condition_col, confirm_button, output_confirm]))

    def on_confirm_clicked(b):
        with output_confirm:
            clear_output(wait=True)
            global condition_name, condition_intensity_col
            condition_name = condition_text.value.strip()
            condition_intensity_col = condition_col.value
            print(f"{condition_name}: {condition_intensity_col}")
            # Proceed to display protein selection UI
            start_protein_selection()

    confirm_button.on_click(on_confirm_clicked)

# ---------------------------
# Function to start protein selection UI
# ---------------------------
def start_protein_selection():
    # Dropdowns for protein selection
    protein_options = sorted(df['Protein.Group'].unique())
    global protein_dropdown, combine_dropdown, overlap_dropdown, process_button, result_output
    result_output = widgets.Output()
    protein_dropdown = widgets.Dropdown(options=protein_options, description='Select Protein:')
    combine_dropdown = widgets.Dropdown(options=['yes', 'no'], value='no', description='Combine Isoforms:')
    overlap_dropdown = widgets.Dropdown(options=['none', 'merge', 'highest', 'last'], value='merge', description='Overlap Strategy:')
    process_button = widgets.Button(description="Process Protein")
    process_button.on_click(lambda b: process_and_visualize(
        protein_dropdown.value, combine_dropdown.value, overlap_dropdown.value
    ))

    display(result_output)
    with result_output:
        clear_output(wait=True)
        display(widgets.VBox([protein_dropdown, combine_dropdown, overlap_dropdown, process_button]))

# ---------------------------
# Function to process and visualize
# ---------------------------
def process_and_visualize(protein_of_interest, combine_isoforms, overlap_strategy):
    global py3d_output, linear_output, download_output, download_dropdown

    # Close previous outputs if they exist
    for output in ['py3d_output', 'linear_output', 'download_output', 'download_dropdown']:
        if output in globals() and globals()[output] is not None:
            globals()[output].close()

    result_output.clear_output(wait=True)
    with result_output:
        print(f"Processing {protein_of_interest}...")

    try:
        # Find the matching sequence in FASTA
        sequence = None
        for record in seq_records:
            if protein_of_interest in record.id:
                sequence = str(record.seq)
                with result_output:
                    print(f"Found matching sequence for {record.id}")
                break
        if sequence is None:
            raise ValueError(f"No sequence found for {protein_of_interest} in FASTA.")

        seq_len = len(sequence)

        # Find isoforms (updated to avoid regex match group warning)
        isoforms = df[df['Protein.Group'].str.contains(protein_of_interest + r'(?:-\d+)?$')]['Protein.Group'].unique()
        with result_output:
            print(f"Found isoforms: {list(isoforms)}")

        if len(isoforms) > 1 and combine_isoforms == 'no':
            with result_output:
                print("Select isoforms (comma-separated, e.g., P41250,P41250-2): ")
            # Use a widget for isoform selection
            isoform_text = widgets.Text(
                value=', '.join(isoforms),
                description='Select Isoforms:',
                layout={'width': '500px'}
            )
            isoform_button = widgets.Button(description="Confirm Isoforms")
            isoform_output = widgets.Output()
            display(widgets.VBox([isoform_text, isoform_button, isoform_output]))

            def on_isoform_confirm(b):
                with isoform_output:
                    clear_output(wait=True)
                    selected = isoform_text.value.strip().split(',')
                    selected_groups = [s.strip() for s in selected]
                    if not all(s in isoforms for s in selected_groups):
                        raise ValueError("Invalid isoform(s).")
                    process_protein(protein_of_interest, selected_groups, sequence, seq_len, overlap_strategy)

            isoform_button.on_click(on_isoform_confirm)
        else:
            selected_groups = isoforms
            process_protein(protein_of_interest, selected_groups, sequence, seq_len, overlap_strategy)

    except Exception as e:
        with result_output:
            print(f"Error during processing: {str(e)}")

# ---------------------------
# Function to process protein data
# ---------------------------
def process_protein(protein_of_interest, selected_groups, sequence, seq_len, overlap_strategy):
    selected_df = df[df['Protein.Group'].isin(selected_groups)]

    # Process intensities for the condition
    peptide_data = {}
    residue_log_int = [None] * seq_len
    min_max_logs = {}

    # Group peptides and compute mean intensities
    peptides = selected_df.groupby('Stripped.Sequence')[condition_intensity_col].mean().reset_index()

    # Z-scale intensities
    intensities = peptides[condition_intensity_col]
    log_intensities = np.log10(intensities + 1)
    mean_log = np.mean(log_intensities)
    std_log = np.std(log_intensities)
    if std_log == 0:
        z_scores = np.zeros_like(log_intensities)  # Handle zero variance
    else:
        z_scores = (log_intensities - mean_log) / std_log

    peptide_positions = []
    for idx, row in peptides.iterrows():
        pep = row['Stripped.Sequence']
        start = sequence.find(pep)
        if start != -1:
            end = start + len(pep)
            z_score = z_scores[idx]
            peptide_positions.append({'pep': pep, 'start': start, 'end': end, 'z_score': z_score})
        else:
            with result_output:
                print(f"Warning: {pep} not found in sequence for {condition_name}.")

    if not peptide_positions:
        raise ValueError(f"No peptides mapped to sequence for {condition_name}.")

    # Map z-scores to residues
    for pos in peptide_positions:
        for i in range(pos['start'], pos['end']):
            if residue_log_int[i] is None:
                residue_log_int[i] = [pos['z_score']]
            else:
                residue_log_int[i].append(pos['z_score'])

    for i in range(seq_len):
        if residue_log_int[i]:
            if overlap_strategy == 'merge':
                residue_log_int[i] = sum(residue_log_int[i]) / len(residue_log_int[i])
            elif overlap_strategy == 'highest':
                residue_log_int[i] = max(residue_log_int[i])
            elif overlap_strategy == 'last':
                residue_log_int[i] = residue_log_int[i][-1]
            else:
                residue_log_int[i] = residue_log_int[i][-1]

    covered_logs = [v for v in residue_log_int if v is not None]
    min_max_logs[condition_name] = (min(covered_logs), max(covered_logs))

    peptide_data[condition_name] = peptides

    # Fetch AlphaFold structure
    base_id = protein_of_interest.split('-')[0]
    pdb_url = f"https://alphafold.ebi.ac.uk/files/AF-{base_id}-F1-model_v4.pdb"
    pdb_response = requests.get(pdb_url)
    if pdb_response.status_code != 200:
        raise ValueError(f"Failed to fetch AlphaFold structure for {base_id}")
    pdb_str = pdb_response.text
    with result_output:
        print(f"Successfully fetched PDB for {base_id}")

    # Shared colormap (using matplotlib.colormaps to avoid deprecation warning)
    cmap = matplotlib.colormaps.get_cmap('autumn')

    # ---------------------------
    # 3Dmol viewer (single viewer)
    # ---------------------------
    global py3d_output
    py3d_output = widgets.Output()
    display(py3d_output)

    bg_dropdown = widgets.Dropdown(
        options=['white', 'black', 'darkgrey'],
        value='black',
        description='Background:'
    )

    def update_views(change=None):
        with py3d_output:
            clear_output(wait=True)
            view = py3Dmol.view(width=1200, height=700)
            view.addModel(pdb_str, 'pdb')
            view.setBackgroundColor(bg_dropdown.value)
            view.setStyle({}, {'cartoon': {'color': 'lightgray'}})
            min_log, max_log = min_max_logs[condition_name]
            for i in range(seq_len):
                if residue_log_int[i] is not None:
                    norm = (residue_log_int[i] - min_log) / (max_log - min_log) if max_log > min_log else 0.5
                    color_hex = colors.rgb2hex(cmap(norm)[:3])
                    view.setStyle({'resi': str(i + 1)}, {'cartoon': {'color': color_hex}})
            view.zoomTo()
            display(HTML(f"""
            <div style="text-align: center;"><b>{condition_name} (Z-Scaled)</b><br>{view._make_html()}</div>
            """))

    bg_dropdown.observe(update_views, names='value')
    update_views()
    display(bg_dropdown)

    # ---------------------------
    # Linear Representation (single plot)
    # ---------------------------
    global linear_output
    linear_output = widgets.Output()
    display(linear_output)
    with linear_output:
        clear_output(wait=True)
        fig, ax1 = plt.subplots(1, 1, figsize=(12, 1))

        # Condition plot
        ax1.add_patch(patches.Rectangle((0, 0), seq_len, 1, facecolor='lightgray', edgecolor='none'))
        min_log, max_log = min_max_logs[condition_name]
        for i in range(seq_len):
            if residue_log_int[i] is not None:
                norm = (residue_log_int[i] - min_log) / (max_log - min_log) if max_log > min_log else 0.5
                ax1.add_patch(patches.Rectangle((i, 0), 1, 1, facecolor=cmap(norm)[:3], edgecolor='none'))
        ax1.set_xlim(0, seq_len)
        ax1.set_ylim(0, 1)
        ax1.set_yticks([])
        ax1.set_xlabel(f'Amino Acid Position ({condition_name})')
        ax1.set_xticks(range(0, seq_len + 1, max(1, seq_len // 10)))

        plt.tight_layout()
        plt.show()

    # ---------------------------
    # Separate Color Legend
    # ---------------------------
    global color_legend_output
    color_legend_output = widgets.Output()
    display(color_legend_output)
    with color_legend_output:
        clear_output(wait=True)
        fig, ax = plt.subplots(figsize=(4, 2))  # Smaller figure for the legend
        min_log, max_log = min_max_logs[condition_name]
        sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=min_log, vmax=max_log))
        sm.set_array([])
        cbar = plt.colorbar(sm, ax=ax, orientation='horizontal', pad=0.1)
        cbar.set_label('Z-Score Intensity')
        ax.remove()  # Remove the axis since we only need the colorbar
        plt.show()


    # ---------------------------
    # Download Option
    # ---------------------------
    # ---------------------------
    # Download Option
    # ---------------------------
    global download_output, download_dropdown, download_triggered
    download_output = widgets.Output()
    display(download_output)
    download_dropdown = widgets.Dropdown(
        options=['no', 'yes'],
        value='no',
        description='Download Files?:'
    )
    display(download_dropdown)
    download_triggered = False  # Flag to prevent multiple downloads

    def on_download_change(change):
        global download_triggered, download_dropdown
        print(f"on_download_change triggered with value: {change['new']}")  # Debug print
        if change['new'] == 'yes' and not download_triggered:
            with download_output:
                clear_output(wait=True)
                print("Preparing download...")
                download_triggered = True  # Set flag to true after starting

                temp_dir = f"/content/{protein_of_interest}_files"
                os.makedirs(temp_dir, exist_ok=True)

                # Save PDB
                pdb_filename = f"{temp_dir}/{protein_of_interest}_protein.pdb"
                with open(pdb_filename, 'w') as f:
                    f.write(pdb_str)

                # Save peptide list for the condition
                peptide_csv_filename = f"{temp_dir}/{protein_of_interest}_{condition_name}_peptides.csv"
                peptide_data[condition_name].to_csv(peptide_csv_filename, index=False)

                # PyMOL script for the condition
                pml_filename = f"{temp_dir}/{protein_of_interest}_{condition_name}_pml_script.pml"
                with open(pml_filename, 'w') as f:
                    f.write(f"load {protein_of_interest}_protein.pdb\n")
                    f.write("hide everything\nshow cartoon\ncolor gray90, all\nzoom\n")
                    min_log, max_log = min_max_logs[condition_name]
                    for i in range(seq_len):
                        if residue_log_int[i] is not None:
                            norm = (residue_log_int[i] - min_log) / (max_log - min_log) if max_log > min_log else 0.5
                            color_hex = colors.rgb2hex(cmap(norm)[:3])
                            f.write(f"color {color_hex}, resi {i+1}\n")

                # Linear plot as JPEG for the condition
                fig, ax = plt.subplots(figsize=(12, 1), dpi=600)
                ax.add_patch(patches.Rectangle((0, 0), seq_len, 1, facecolor='lightgray', edgecolor='none'))
                min_log, max_log = min_max_logs[condition_name]
                for i in range(seq_len):
                    if residue_log_int[i] is not None:
                        norm = (residue_log_int[i] - min_log) / (max_log - min_log) if max_log > min_log else 0.5
                        ax.add_patch(patches.Rectangle((i, 0), 1, 1, facecolor=cmap(norm)[:3], edgecolor='none'))
                ax.set_xlim(0, seq_len)
                ax.set_ylim(0, 1)
                ax.set_yticks([])
                ax.set_xlabel(f'Amino Acid Position ({condition_name})')
                ax.set_xticks(range(0, seq_len + 1, max(1, seq_len // 10)))
                linear_filename = f"{temp_dir}/{protein_of_interest}_{condition_name}_linear.jpeg"
                plt.savefig(linear_filename, format='jpeg', dpi=600, bbox_inches='tight')
                plt.close(fig)

                # Zip everything
                zip_filename = f"{protein_of_interest}_files.zip"
                with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
                    for f in glob.glob(f"{temp_dir}/*"):
                        if os.path.isfile(f) and f != zip_filename:  # Avoid zipping the ZIP file itself
                            zipf.write(f, os.path.basename(f))

                # Download
                files.download(zip_filename)
                print(f"✅ Download ready: {zip_filename}")

            # Unobserve the event to prevent further triggers
            download_dropdown.unobserve(on_download_change, names='value')

    download_dropdown.observe(on_download_change, names='value')

# ---------------------------
# Run UI
# ---------------------------
start_ui()

ToggleButtons(description='Input Files:', options=('Use existing files', 'Upload new files'), value='Use exist…

Button(description='Upload CSV and FASTA', style=ButtonStyle())

Using existing CSV file: /content/Peptide_Demo_File.csv
Using existing FASTA file: /content/uniprotkb_Human.fasta


VBox(children=(Text(value='Condition', description='Name for Condition:', layout=Layout(width='300px')), Dropd…

Output()

Dropdown(description='Background:', index=1, options=('white', 'black', 'darkgrey'), value='black')

Output()

Output()

Output()

Dropdown(description='Download Files?:', options=('no', 'yes'), value='no')

on_download_change triggered with value: yes


**2 Condition Viewer**

In [8]:
# Install required package

!pip install -q py3Dmol biopython -q


import pandas as pd
import math
import requests
from Bio import SeqIO
import io
from matplotlib import cm, colors
import matplotlib  # Added to fix the 'matplotlib' is not defined error
import py3Dmol
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import os
import zipfile
import glob
import numpy as np

# %matplotlib inline  # Ensure reliable plot rendering in Colab

# ---------------------------
# Function to load data
# ---------------------------

def load_data(use_existing=True,
              csv_path="/content/*.csv",
              fasta_path="/content/*.fasta"):
    global df, seq_records, condition1_intensity_col, condition2_intensity_col
    global file_name, fasta_name, condition1_name, condition2_name

    if use_existing:
        # Look for CSV in /content/
        csv_files = glob.glob(csv_path)
        if csv_files:
            file_name = csv_files[0]
            print(f"Using existing CSV file: {file_name}")
        else:
            print("No CSV found in /content/, please upload one.")
            uploaded = files.upload()
            file_name = list(uploaded.keys())[0]

        # Look for FASTA in /content/
        fasta_files = glob.glob(fasta_path)
        if fasta_files:
            fasta_name = fasta_files[0]
            print(f"Using existing FASTA file: {fasta_name}")
        else:
            print("No FASTA found in /content/, please upload one.")
            fasta_uploaded = files.upload()
            fasta_name = list(fasta_uploaded.keys())[0]
    else:
        print("Upload peptide CSV file:")
        uploaded = files.upload()
        file_name = list(uploaded.keys())[0]

        print("Upload FASTA file:")
        fasta_uploaded = files.upload()
        fasta_name = list(fasta_uploaded.keys())[0]

    # ✅ Load CSV
    df = pd.read_csv(file_name)
    if "Protein.Group" not in df.columns:
        raise ValueError("CSV must contain 'Protein.Group' column.")
    df['Protein.Group'] = df['Protein.Group'].fillna('').astype(str)

    # ✅ Load FASTA
    seq_records = list(SeqIO.parse(fasta_name, "fasta"))
    if not seq_records:
        raise ValueError("No sequences found in FASTA file.")
    print(f"Loaded {len(seq_records)} sequences from FASTA.")

    # Identify intensity columns (unchanged)
    intensity_cols = [col for col in df.columns if 'intensity' in col.lower()]
    if len(intensity_cols) != 2:
        raise ValueError(f"Expected exactly 2 intensity columns, found: {intensity_cols}")


    # Create widgets for condition naming and column mapping
    condition1_text = widgets.Text(
        value='Condition 1',
        description='Name for Condition 1:',
        layout={'width': '300px'}
    )
    condition2_text = widgets.Text(
        value='Condition 2',
        description='Name for Condition 2:',
        layout={'width': '300px'}
    )
    condition1_col = widgets.Dropdown(
        options=intensity_cols,
        value=intensity_cols[0],
        description='Map to Column:'
    )
    condition2_col = widgets.Dropdown(
        options=intensity_cols,
        value=intensity_cols[1],
        description='Map to Column:'
    )
    confirm_button = widgets.Button(description="Confirm Conditions")
    output_confirm = widgets.Output()
    display(widgets.VBox([condition1_text, condition1_col, condition2_text, condition2_col, confirm_button, output_confirm]))

    def on_confirm_clicked(b):
        with output_confirm:
            clear_output(wait=True)
            global condition1_name, condition2_name, condition1_intensity_col, condition2_intensity_col
            condition1_name = condition1_text.value.strip()
            condition2_name = condition2_text.value.strip()
            condition1_intensity_col = condition1_col.value
            condition2_intensity_col = condition2_col.value
            if condition1_intensity_col == condition2_intensity_col:
                raise ValueError("Intensity columns must be different.")
            print(f"{condition1_name}: {condition1_intensity_col}, {condition2_name}: {condition2_intensity_col}")
            # Proceed to display protein selection UI
            start_protein_selection()

    confirm_button.on_click(on_confirm_clicked)

# ---------------------------
# Function to start protein selection UI
# ---------------------------
def start_protein_selection():
    # Dropdowns for protein selection
    protein_options = sorted(df['Protein.Group'].unique())
    global protein_dropdown, combine_dropdown, overlap_dropdown, process_button, result_output
    result_output = widgets.Output()
    protein_dropdown = widgets.Dropdown(options=protein_options, description='Select Protein:')
    combine_dropdown = widgets.Dropdown(options=['yes', 'no'], value='no', description='Combine Isoforms:')
    overlap_dropdown = widgets.Dropdown(options=['none', 'merge', 'highest', 'last'], value='merge', description='Overlap Strategy:')
    process_button = widgets.Button(description="Process Protein")
    process_button.on_click(lambda b: process_and_visualize(
        protein_dropdown.value, combine_dropdown.value, overlap_dropdown.value
    ))

    display(result_output)
    with result_output:
        clear_output(wait=True)
        display(widgets.VBox([protein_dropdown, combine_dropdown, overlap_dropdown, process_button]))

# ---------------------------
# Function to process and visualize
# ---------------------------
def process_and_visualize(protein_of_interest, combine_isoforms, overlap_strategy):
    global py3d_output, linear_output, download_output, download_dropdown

    # Close previous outputs if they exist
    for output in ['py3d_output', 'linear_output', 'download_output', 'download_dropdown']:
        if output in globals() and globals()[output] is not None:
            globals()[output].close()

    result_output.clear_output(wait=True)
    with result_output:
        print(f"Processing {protein_of_interest}...")

    try:
        # Find the matching sequence in FASTA
        sequence = None
        for record in seq_records:
            if protein_of_interest in record.id:
                sequence = str(record.seq)
                with result_output:
                    print(f"Found matching sequence for {record.id}")
                break
        if sequence is None:
            raise ValueError(f"No sequence found for {protein_of_interest} in FASTA.")

        seq_len = len(sequence)

        # Find isoforms (updated to avoid regex match group warning)
        isoforms = df[df['Protein.Group'].str.contains(protein_of_interest + r'(?:-\d+)?$')]['Protein.Group'].unique()
        with result_output:
            print(f"Found isoforms: {list(isoforms)}")

        if len(isoforms) > 1 and combine_isoforms == 'no':
            with result_output:
                print("Select isoforms (comma-separated, e.g., P41250,P41250-2): ")
            # Use a widget for isoform selection
            isoform_text = widgets.Text(
                value=', '.join(isoforms),
                description='Select Isoforms:',
                layout={'width': '500px'}
            )
            isoform_button = widgets.Button(description="Confirm Isoforms")
            isoform_output = widgets.Output()
            display(widgets.VBox([isoform_text, isoform_button, isoform_output]))

            def on_isoform_confirm(b):
                with isoform_output:
                    clear_output(wait=True)
                    selected = isoform_text.value.strip().split(',')
                    selected_groups = [s.strip() for s in selected]
                    if not all(s in isoforms for s in selected_groups):
                        raise ValueError("Invalid isoform(s).")
                    process_protein(protein_of_interest, selected_groups, sequence, seq_len, overlap_strategy)

            isoform_button.on_click(on_isoform_confirm)
        else:
            selected_groups = isoforms
            process_protein(protein_of_interest, selected_groups, sequence, seq_len, overlap_strategy)

    except Exception as e:
        with result_output:
            print(f"Error during processing: {str(e)}")

# ---------------------------
# Function to process protein data
# ---------------------------
def process_protein(protein_of_interest, selected_groups, sequence, seq_len, overlap_strategy):
    selected_df = df[df['Protein.Group'].isin(selected_groups)]

    # Process intensities for both conditions
    conditions = {
        condition1_name: condition1_intensity_col,
        condition2_name: condition2_intensity_col
    }
    peptide_data = {}
    residue_log_int = {condition1_name: [None] * seq_len, condition2_name: [None] * seq_len}
    min_max_logs = {}

    for condition, intensity_col in conditions.items():
        # Group peptides and compute mean intensities
        peptides = selected_df.groupby('Stripped.Sequence')[intensity_col].mean().reset_index()

        # Z-scale intensities
        intensities = peptides[intensity_col]
        log_intensities = np.log10(intensities + 1)
        mean_log = np.mean(log_intensities)
        std_log = np.std(log_intensities)
        if std_log == 0:
            z_scores = np.zeros_like(log_intensities)  # Handle zero variance
        else:
            z_scores = (log_intensities - mean_log) / std_log

        peptide_positions = []
        for idx, row in peptides.iterrows():
            pep = row['Stripped.Sequence']
            start = sequence.find(pep)
            if start != -1:
                end = start + len(pep)
                z_score = z_scores[idx]
                peptide_positions.append({'pep': pep, 'start': start, 'end': end, 'z_score': z_score})
            else:
                with result_output:
                    print(f"Warning: {pep} not found in sequence for {condition}.")

        if not peptide_positions:
            raise ValueError(f"No peptides mapped to sequence for {condition}.")

        # Map z-scores to residues
        for pos in peptide_positions:
            for i in range(pos['start'], pos['end']):
                if residue_log_int[condition][i] is None:
                    residue_log_int[condition][i] = [pos['z_score']]
                else:
                    residue_log_int[condition][i].append(pos['z_score'])

        for i in range(seq_len):
            if residue_log_int[condition][i]:
                if overlap_strategy == 'merge':
                    residue_log_int[condition][i] = sum(residue_log_int[condition][i]) / len(residue_log_int[condition][i])
                elif overlap_strategy == 'highest':
                    residue_log_int[condition][i] = max(residue_log_int[condition][i])
                elif overlap_strategy == 'last':
                    residue_log_int[condition][i] = residue_log_int[condition][i][-1]
                else:
                    residue_log_int[condition][i] = residue_log_int[condition][i][-1]

        covered_logs = [v for v in residue_log_int[condition] if v is not None]
        min_max_logs[condition] = (min(covered_logs), max(covered_logs))

        peptide_data[condition] = peptides

    # Fetch AlphaFold structure
    base_id = protein_of_interest.split('-')[0]
    pdb_url = f"https://alphafold.ebi.ac.uk/files/AF-{base_id}-F1-model_v4.pdb"
    pdb_response = requests.get(pdb_url)
    if pdb_response.status_code != 200:
        raise ValueError(f"Failed to fetch AlphaFold structure for {base_id}")
    pdb_str = pdb_response.text
    with result_output:
        print(f"Successfully fetched PDB for {base_id}")

    # Shared colormap (using matplotlib.colormaps to avoid deprecation warning)
    cmap = matplotlib.colormaps.get_cmap('autumn')

    # ---------------------------
    # 3Dmol viewers (side by side with basic rendering)
    # ---------------------------
    global py3d_output
    py3d_output = widgets.Output()
    display(py3d_output)

    bg_dropdown = widgets.Dropdown(
        options=['white', 'black', 'darkgrey'],
        value='black',
        description='Background:'
    )

    def update_views(change=None):
        with py3d_output:
            clear_output(wait=True)
            # Create two viewers
            view_condition1 = py3Dmol.view(width=900, height=500)
            view_condition2 = py3Dmol.view(width=900, height=500)
            view_condition1.addModel(pdb_str, 'pdb')
            view_condition2.addModel(pdb_str, 'pdb')
            view_condition1.setBackgroundColor(bg_dropdown.value)
            view_condition2.setBackgroundColor(bg_dropdown.value)
            view_condition1.setStyle({}, {'cartoon': {'color': 'lightgray'}})
            view_condition2.setStyle({}, {'cartoon': {'color': 'lightgray'}})

            # Color residues based on z-scaled intensities
            for condition in conditions:
                view = view_condition1 if condition == condition1_name else view_condition2
                min_log, max_log = min_max_logs[condition]
                for i in range(seq_len):
                    if residue_log_int[condition][i] is not None:
                        norm = (residue_log_int[condition][i] - min_log) / (max_log - min_log) if max_log > min_log else 0.5
                        color_hex = colors.rgb2hex(cmap(norm)[:3])
                        view.setStyle({'resi': str(i + 1)}, {'cartoon': {'color': color_hex}})

            view_condition1.zoomTo()
            view_condition2.zoomTo()

            # Display viewers side by side
            display(HTML(f"""
            <div style="display: flex; justify-content: space-between;">
                <div style="width: 48%; text-align: center;"><b>{condition1_name} (Z-Scaled)</b><br>{view_condition1._make_html()}</div>
                <div style="width: 48%; text-align: center;"><b>{condition2_name} (Z-Scaled)</b><br>{view_condition2._make_html()}</div>
            </div>
            """))

    bg_dropdown.observe(update_views, names='value')
    update_views()
    display(bg_dropdown)

    # ---------------------------
    # Linear Representations (side by side)
    # ---------------------------
    global linear_output
    linear_output = widgets.Output()
    display(linear_output)
    with linear_output:
        clear_output(wait=True)
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 1), sharey=True)

        # Condition 1 plot
        ax1.add_patch(patches.Rectangle((0, 0), seq_len, 1, facecolor='lightgray', edgecolor='none'))
        min_log, max_log = min_max_logs[condition1_name]
        for i in range(seq_len):
            if residue_log_int[condition1_name][i] is not None:
                norm = (residue_log_int[condition1_name][i] - min_log) / (max_log - min_log) if max_log > min_log else 0.5
                ax1.add_patch(patches.Rectangle((i, 0), 1, 1, facecolor=cmap(norm)[:3], edgecolor='none'))
        ax1.set_xlim(0, seq_len)
        ax1.set_ylim(0, 1)
        ax1.set_yticks([])
        ax1.set_xlabel(f'Amino Acid Position ({condition1_name})')
        ax1.set_xticks(range(0, seq_len + 1, max(1, seq_len // 10)))

        # Condition 2 plot
        ax2.add_patch(patches.Rectangle((0, 0), seq_len, 1, facecolor='lightgray', edgecolor='none'))
        min_log, max_log = min_max_logs[condition2_name]
        for i in range(seq_len):
            if residue_log_int[condition2_name][i] is not None:
                norm = (residue_log_int[condition2_name][i] - min_log) / (max_log - min_log) if max_log > min_log else 0.5
                ax2.add_patch(patches.Rectangle((i, 0), 1, 1, facecolor=cmap(norm)[:3], edgecolor='none'))
        ax2.set_xlim(0, seq_len)
        ax2.set_ylim(0, 1)
        ax2.set_yticks([])
        ax2.set_xlabel(f'Amino Acid Position ({condition2_name})')
        ax2.set_xticks(range(0, seq_len + 1, max(1, seq_len // 10)))

        plt.tight_layout()
        plt.show()

        # ---------------------------
        # Separate Color Legend
        # ---------------------------
        global color_legend_output
        color_legend_output = widgets.Output()
        display(color_legend_output)
        with color_legend_output:
            clear_output(wait=True)
            fig, ax = plt.subplots(figsize=(4, 2))  # Smaller figure for the legend
            sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=min(min_max_logs[condition1_name][0], min_max_logs[condition2_name][0]), vmax=max(min_max_logs[condition1_name][1], min_max_logs[condition2_name][1])))
            sm.set_array([])
            cbar = plt.colorbar(sm, ax=ax, orientation='horizontal', pad=0.1)
            cbar.set_label('Z-Score Intensity')
            ax.remove()  # Remove the axis since we only need the colorbar
            plt.show()

    # ---------------------------
    # Download Option
    # ---------------------------
    global download_output, download_dropdown
    download_output = widgets.Output()
    display(download_output)

    download_dropdown = widgets.Dropdown(
        options=['no', 'yes'],
        value='no',
        description='Download Files?:'
    )
    display(download_dropdown)

    def on_download_change(change):
        if change['new'] == 'yes':
            with download_output:
                clear_output(wait=True)
                print("Preparing download...")

                temp_dir = f"/content/{protein_of_interest}_files"
                os.makedirs(temp_dir, exist_ok=True)

                # Save PDB
                pdb_filename = f"{temp_dir}/{protein_of_interest}_protein.pdb"
                with open(pdb_filename, 'w') as f:
                    f.write(pdb_str)

                # Save peptide lists for both conditions
                for condition in conditions:
                    peptide_csv_filename = f"{temp_dir}/{protein_of_interest}_{condition}_peptides.csv"
                    peptide_data[condition].to_csv(peptide_csv_filename, index=False)

                # PyMOL scripts for both conditions
                for condition in conditions:
                    pml_filename = f"{temp_dir}/{protein_of_interest}_{condition}_pymol_script.pml"
                    with open(pml_filename, 'w') as f:
                        f.write(f"load {protein_of_interest}_protein.pdb\n")
                        f.write("hide everything\nshow cartoon\ncolor gray90, all\nzoom\n")
                        min_log, max_log = min_max_logs[condition]
                        for i in range(seq_len):
                            if residue_log_int[condition][i] is not None:
                                norm = (residue_log_int[condition][i] - min_log) / (max_log - min_log) if max_log > min_log else 0.5
                                color_hex = colors.rgb2hex(cmap(norm)[:3])
                                f.write(f"color {color_hex}, resi {i+1}\n")

                # Linear plots as JPEG for both conditions
                for condition, ax in [(condition1_name, ax1), (condition2_name, ax2)]:
                    fig, ax = plt.subplots(figsize=(12, 1), dpi=600)
                    ax.add_patch(patches.Rectangle((0, 0), seq_len, 1, facecolor='lightgray', edgecolor='none'))
                    min_log, max_log = min_max_logs[condition]
                    for i in range(seq_len):
                        if residue_log_int[condition][i] is not None:
                            norm = (residue_log_int[condition][i] - min_log) / (max_log - min_log) if max_log > min_log else 0.5
                            ax.add_patch(patches.Rectangle((i, 0), 1, 1, facecolor=cmap(norm)[:3], edgecolor='none'))
                    ax.set_xlim(0, seq_len)
                    ax.set_ylim(0, 1)
                    ax.set_yticks([])
                    ax.set_xlabel(f'Amino Acid Position ({condition})')
                    ax.set_xticks(range(0, seq_len + 1, max(1, seq_len // 10)))
                    linear_filename = f"{temp_dir}/{protein_of_interest}_{condition}_linear.jpeg"
                    plt.savefig(linear_filename, format='jpeg', dpi=600, bbox_inches='tight')
                    plt.close(fig)

                # Zip everything
                zip_filename = f"{protein_of_interest}_files.zip"
                with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
                    for f in glob.glob(f"{temp_dir}/*"):
                        zipf.write(f, os.path.basename(f))

                # Download
                files.download(zip_filename)
                print(f"✅ Download ready: {zip_filename}")

    download_dropdown.observe(on_download_change, names='value')

# ---------------------------
# Run UI
# ---------------------------
load_data()


Using existing CSV file: /content/Peptide_Demo_File_CD.csv
Using existing FASTA file: /content/uniprotkb_Human.fasta
Loaded 42509 sequences from FASTA.


VBox(children=(Text(value='Condition 1', description='Name for Condition 1:', layout=Layout(width='300px')), D…

Output()

Dropdown(description='Background:', index=1, options=('white', 'black', 'darkgrey'), value='black')

Output()

Output()

Dropdown(description='Download Files?:', options=('no', 'yes'), value='no')

Output()

Dropdown(description='Background:', index=1, options=('white', 'black', 'darkgrey'), value='black')

Output()

Output()

Dropdown(description='Download Files?:', options=('no', 'yes'), value='no')

Output()

Dropdown(description='Background:', index=1, options=('white', 'black', 'darkgrey'), value='black')

Output()

Output()

Dropdown(description='Download Files?:', options=('no', 'yes'), value='no')