In [1]:
# Install required libraries
!pip install dash plotly pandas dna_features_viewer
import pandas as pd
from dash import Dash, dcc, html, dash_table
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go
from google.colab import files
import numpy as np
from dna_features_viewer import BiopythonTranslator

Collecting dash
  Downloading dash-3.1.1-py3-none-any.whl.metadata (10 kB)
Collecting dna_features_viewer
  Downloading dna_features_viewer-3.1.5-py3-none-any.whl.metadata (2.1 kB)
Collecting retrying (from dash)
  Downloading retrying-1.4.0-py3-none-any.whl.metadata (7.5 kB)
Collecting Biopython (from dna_features_viewer)
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading dash-3.1.1-py3-none-any.whl (7.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m48.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dna_features_viewer-3.1.5-py3-none-any.whl (31 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m56.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading retrying-1.4.0-py3-none-any.whl (11 kB)
Installing collected packages: retrying, Biopython, dna_features_

In [2]:
uploaded = files.upload()
file_name = next(iter(uploaded))
df = pd.read_csv(file_name)

Saving cleaned_promoter_data.csv to cleaned_promoter_data.csv


In [3]:
# --- Initialize Dash App ---
app = Dash(__name__)


In [4]:
# --- Precompute Statistics ---
label_counts = df['label'].value_counts().reset_index()
label_counts.columns = ['Label', 'Count']
seq_length_stats = df['seq_length'].describe().reset_index()
seq_length_stats.columns = ['Metric', 'Value']

In [5]:
# --- Initialize Dash App ---
app = Dash(__name__)

# --- Precompute Statistics ---
label_counts = df['label'].value_counts().reset_index()
label_counts.columns = ['Label', 'Count']
seq_length_stats = df['seq_length'].describe().reset_index()
seq_length_stats.columns = ['Metric', 'Value']

# --- Define Layout ---
app.layout = html.Div([
    html.H1("Promoter Sequence Analysis Dashboard", style={'textAlign': 'center', 'color': '#2c3e50'}),

    # Row 1: Summary Cards
    html.Div([
        html.Div([
            html.H3("Total Sequences"),
            html.P(f"{len(df):,}")
        ], className='card'),

        html.Div([
            html.H3("Unique Sequences"),
            html.P(f"{df['sequence'].nunique():,}")
        ], className='card'),

        html.Div([
            html.H3("Sequence Length Range"),
            html.P(f"{df['seq_length'].min()}-{df['seq_length'].max()} bp")
        ], className='card'),

        html.Div([
            html.H3("Label Balance"),
            html.P(f"0: {label_counts.iloc[0]['Count']:,} | 1: {label_counts.iloc[1]['Count']:,}")
        ], className='card')
    ], className='card-container'),

    # Row 2: Distribution Plots
    html.Div([
        dcc.Graph(
            figure=px.pie(label_counts, values='Count', names='Label',
                        title='Promoter/Non-Promoter Distribution',
                        color='Label',
                        color_discrete_map={'0':'#e74c3c','1':'#2ecc71'})
        ),
        dcc.Graph(
            figure=px.histogram(df, x='seq_length', nbins=50,
                              title='Sequence Length Distribution',
                              color_discrete_sequence=['#3498db'])
        )
    ], style={'columnCount': 2}),

    # Row 3: Sequence Analysis
    html.Div([
        dcc.Dropdown(
            id='sequence-selector',
            options=[{'label': f"Sequence {i+1}", 'value': i} for i in range(10)],
            value=0,
            style={'width': '100%'}
        ),
        dcc.Graph(id='sequence-plot'),
        html.Div(id='sequence-info')
    ]),

    # Data Table
    dash_table.DataTable(
        id='data-table',
        columns=[{'name': col, 'id': col} for col in df.columns],
        data=df.head(100).to_dict('records'),
        page_size=10,
        style_table={'overflowX': 'auto', 'height': '300px'},
        style_cell={
            'textAlign': 'left',
            'padding': '8px',
            'whiteSpace': 'normal',
            'height': 'auto',
            'maxWidth': '200px'
        }
    )
], style={'fontFamily': 'Arial'})

In [6]:
# --- Callback for Sequence Visualization ---
@app.callback(
    [Output('sequence-plot', 'figure'),
     Output('sequence-info', 'children')],
    [Input('sequence-selector', 'value')]
)
def update_sequence_plot(selected_idx):
    sequence = df.iloc[selected_idx]['sequence']
    seq_length = df.iloc[selected_idx]['seq_length']
    label = df.iloc[selected_idx]['label']

    # Create a simple sequence visualization
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=list(range(seq_length)),
        y=np.random.rand(seq_length),  # Placeholder for sequence features
        mode='lines',
        line=dict(color='green' if label == 1 else 'red')
    ))
    fig.update_layout(
        title=f"Sequence {selected_idx+1} Visualization (Length: {seq_length} bp)",
        xaxis_title="Position",
        yaxis_title="Feature Score",
        showlegend=False
    )

    # Sequence info
    info = html.Div([
        html.H4(f"Sequence Metadata:"),
        html.P(f"Label: {'Promoter (1)' if label == 1 else 'Non-Promoter (0)'}"),
        html.P(f"Length: {seq_length} bp"),
        html.Details([
            html.Summary("Show Sequence (First 100bp)"),
            html.P(sequence[:100] + "...")
        ])
    ])

    return fig, info


In [8]:
# --- Run App ---
app.run(mode='inline')

<IPython.core.display.Javascript object>