In [None]:
from bokeh.models import ColumnDataSource, LabelSet, HoverTool, Select, CustomJS, Div, Toggle
from bokeh.plotting import figure
from bokeh.layouts import column, row
from bokeh.io import curdoc
from pathlib import Path
from bokeh.transform import factor_cmap, factor_mark
import pandas as pd
from bokeh.palettes import Paired12
from bokeh.models import CategoricalColorMapper


In [None]:
cwd = Path.cwd()
annotation_folder = cwd / "annotation"
annotation_file = annotation_folder / "annotation_report_plus.xlsx"
annotation_100 = annotation_folder / "annotation_report_100%_plus.xlsx"
if not annotation_file.exists() and not annotation_100.exists():
    raise FileNotFoundError(f"Excel file(s) not found")
df = pd.read_excel(annotation_file)
df_100 = pd.read_excel(annotation_100)



In [None]:
# bokeh_df["ID"] = range(1, len(df) + 1)
# df_100["ID"] = 0
bokeh_df = pd.concat([df, df_100])

In [None]:
def color_code_sequence(seq):
    colors = {'A': '#CC0000', 'T': '#007F00', 'C': '#0000CC', 'G': '#FFB300'}
    if not isinstance(seq, str):
        return ''
    return ''.join(f'<span style="color: {colors.get(nuc, "#000")}">{nuc}</span>' for nuc in seq)
sequence_columns = [
    '23_heptamer','23_nonamer', 
    '12_heptamer', '12_nonamer'
    ]
for col in sequence_columns:
    bokeh_df[col] = bokeh_df[col].astype(str)
    bokeh_df[col + '_colored'] = bokeh_df[col].apply(color_code_sequence)

In [None]:
bokeh_df['Region_Haplotype'] = bokeh_df['Region'] + '-' + bokeh_df['Haplotype']
bokeh_df['Region_Haplotype_Num'] = pd.factorize(bokeh_df['Region_Haplotype'])[0] + 1
bokeh_df['Segment_Function'] = bokeh_df['Segment'] + ':' + bokeh_df['Function'] + ':' + bokeh_df['Status'].astype(str)
bokeh_df = bokeh_df.sort_values(by=['Region', 'Haplotype', 'Start coord'])
bokeh_df['X_axis'] = bokeh_df.groupby(['Region', 'Haplotype']).cumcount() + 1




In [None]:
source = ColumnDataSource(bokeh_df[bokeh_df['Status'] == 'Novel'])

In [None]:
plot_width = 1500
p = figure(title="Region-Haplotype Plot: All",
            x_axis_label='Position', width=plot_width, height=800)
p.title.text_font_size = "20pt"
# Define your markers and colors based on segments and function
# Define unique combinations based on your data preparation
COMBINATIONS = [
    'V:F/ORF:Novel', 'V:F/ORF:Known', 'V:P:Novel', 'V:P:Known', 
    'D:PF/ORF:Novel', 'D:PF/ORF:Known', 'J:PF/ORF:Novel', 'J:PF/ORF:Known',
    ]
MARKERS = [
    'hex', 'circle_x', 'triangle', 'square', 
    'inverted_triangle', 'diamond', 'cross', 
    ]
COLORS = [
    'red', 'blue', 'green', 'yellow', 
    'orange', 'purple', 'pink', 'brown'
    ]

p.scatter("X_axis", "Region_Haplotype_Num", source=source,
            legend_field='Segment_Function', fill_alpha=0.8, size=12,
            marker=factor_mark('Segment_Function', MARKERS, COMBINATIONS),
            color=factor_cmap('Segment_Function', Paired12, COMBINATIONS))

labels = LabelSet(x='X_axis', y='Region_Haplotype_Num', level='glyph', source=source, text_font_size="8pt", text_align='center', y_offset=15)
p.add_layout(labels)

In [None]:
# Configure the hover tool as before
hover = HoverTool()
hover.tooltips = """
    <div style="background: rgba(255, 255, 255, 0.95); padding: 12px; border-radius: 5px; box-shadow: 0 0 8px rgba(0,0,0,0.5);">
        <div style="font-size: 25px; margin-bottom: 7px; font-weight: bold; colior: #444; text-decoration: 1px solid black;">Details</div>
        <div style="color: #333; font-size: 20px;">
            <span style="font-weight: bold; color: #017B63;">Sample:</span> @Sample<br> 
            <span style="font-weight: bold; color: #017B63;">Name Segment:</span> @{Old name-like}<br>
            <span style="font-weight: bold; color: #017B63;">Reference:</span> @Reference<br>
            <span style="font-weight: bold; color: #017B63;">Short name:</span> @{Short name}<br>
            <span style="font-weight: bold; color: #017B63;">Status:</span> @{Status}<br>
            <span style="font-weight: bold; color: #017B63;">Segment Type:</span> @Segment<br>
            <span style="font-weight: bold; color: #017B63;">Segment Region:</span> @Region<br>
            <span style="font-weight: bold; color: #017B63;">Haplotype:</span> @Haplotype<br>
            <span style="font-weight: bold; color: #017B63;">Function:</span> @{Function}<br>
            <span style="font-weight: bold; color: #017B63;">Start Coord:</span> @{Start coord}<br>
            <span style="font-weight: bold; color: #017B63;">End Coord:</span> @{End coord}<br>
            <br>  
            <div style="font-size: 25px; margin-bottom: 7px; font-weight: bold; color: #444;">RSS</div> 
            <span style="font-weight: bold; color: #017B63;">23 heptamer:</span> <span style="font-weight: bold; color: #017B63;">@{23_heptamer_colored}{safe}</span><br>
            <span style="font-weight: bold; color: #017B63;">23 nonamer:</span> <span style="font-weight: bold; color: #017B63;">@{23_nonamer_colored}{safe}</span><br>
            <span style="font-weight: bold; color: #017B63;">12 heptamer:</span> <span style="font-weight: bold; color: #017B63;">@{12_heptamer_colored}{safe}</span><br>
            <span style="font-weight: bold; color: #017B63;">12 nonamer:</span> <span style="font-weight: bold; color: #017B63;">@{12_nonamer_colored}{safe}</span><br>
        </div>
    </div>
    """

p.add_tools(hover)

# Hide the y-axis as before
p.yaxis.visible = False

# Style the legend
p.legend.location = "top_right"
p.legend.title = 'Segment Type'
p.legend.title_text_font_style = "bold"
p.legend.background_fill_alpha = 0.7
p.legend.border_line_color = None
p.legend.label_text_font_size = '15pt'
p.legend.label_height = 30
p.legend.label_width = 50 
p.legend.glyph_height = 30
p.legend.glyph_width = 30



In [None]:
def create_custom_title(title_text, font_size="18px"):
    html_content = f"""
    <div style='font-size: {font_size};'><strong>{title_text}</strong></div>
    """
    return Div(text=html_content)


In [None]:
# Function to filter data based on Region_Haplotype and Pseudogene
def update_data():
    # Start with the full DataFrame
    filtered_df = bokeh_df.copy()
    
    # Filter by Region_Haplotype if not 'All'
    selected_region_haplotype = region_haplotype_select.value
    if selected_region_haplotype != "All":
        filtered_df = filtered_df[filtered_df['Region_Haplotype'] == selected_region_haplotype]
    
    # Filter by Pseudogene if toggle is inactive
    if not pseudogene_toggle.active:
        filtered_df = filtered_df[filtered_df['Function'] != 'P']
        pseudogene_toggle.label = "Include Pseudogenes"
    else:
        pseudogene_toggle.label = "Functional Only"
    
    # Filter to show only new segments if toggle is active
    if new_segments_toggle.active:
        filtered_df = filtered_df[filtered_df['Status'] == 'Novel']
        new_segments_toggle.label = "Show Both New and Reference Segments"
    else:
        new_segments_toggle.label = "Show Novel Segments Only"
    
    # Update ColumnDataSource
    source.data = ColumnDataSource.from_df(filtered_df)
    
    # Optionally update plot title to reflect current filtering
    p.title.text = f"Region-Haplotype Plot: {selected_region_haplotype}"



# Dropdown for Region_Haplotype
region_haplotype_options = ["All"] + sorted(bokeh_df['Region_Haplotype'].unique().tolist())
region_haplotype_select = Select(value="All", options=region_haplotype_options, width=200)

pseudogene_toggle = Toggle(label="Functional Only", button_type="success", active=True, width=200)

new_segments_toggle = Toggle(label="Show reference segments", button_type="success", active=False, width=200)
region_haplotype_select.on_change('value', lambda attr, old, new: update_data())
pseudogene_toggle.on_click(lambda active: update_data())
new_segments_toggle.on_click(lambda active: update_data())


In [None]:
region_text = create_custom_title("Region-Haplotype:", font_size="18px")
pseudo_text = create_custom_title("Function:", font_size="18px")
annotation_text = create_custom_title("New:", font_size="18px")

In [None]:
region_button = column(region_text, region_haplotype_select)
toggle_button = column(pseudo_text, pseudogene_toggle)
toggle_100_button = column(annotation_text, new_segments_toggle)
buttons = row(region_button, toggle_button, toggle_100_button)

In [None]:
layout = column(p, buttons)
curdoc().add_root(layout)