In [1]:
# Install required libraries
!pip install dash plotly pandas biopython
import pandas as pd
from dash import Dash, dcc, html, dash_table, Input, Output, State
import plotly.express as px
import plotly.graph_objects as go
import ast  # For converting string to dict
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from google.colab import files


Collecting dash
  Downloading dash-3.1.1-py3-none-any.whl.metadata (10 kB)
Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting retrying (from dash)
  Downloading retrying-1.4.0-py3-none-any.whl.metadata (7.5 kB)
Downloading dash-3.1.1-py3-none-any.whl (7.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m36.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m65.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading retrying-1.4.0-py3-none-any.whl (11 kB)
Installing collected packages: retrying, biopython, dash
Successfully installed biopython-1.85 dash-3.1.1 retrying-1.4.0


In [2]:
# Load Dataset
uploaded = files.upload()
file_name = next(iter(uploaded))
df = pd.read_csv(file_name)


Saving Pseudomonas aeruginosa Protein.csv to Pseudomonas aeruginosa Protein.csv


In [5]:
# Initialize Dash App
app = Dash(__name__)

In [6]:
# Define Layout
app.layout = html.Div([
    html.H1("Pseudomonas aeruginosa Protein Analysis Dashboard",
           style={'textAlign': 'center', 'color': '#2c3e50', 'marginBottom': '30px'}),

    # Row 1: Summary Cards
    html.Div([
        html.Div([
            html.H3("Total Proteins"),
            html.P(f"{len(df):,}")
        ], className='card', style={'background': '#3498db'}),

        html.Div([
            html.H3("Avg Length"),
            html.P(f"{df['Seq_Length'].mean():.1f} aa")
        ], className='card', style={'background': '#2ecc71'}),

        html.Div([
            html.H3("Avg MW (kDa)"),
            html.P(f"{df['Molecular_Weight'].mean()/1000:.1f}")
        ], className='card', style={'background': '#e74c3c'}),

        html.Div([
            html.H3("Common pI Range"),
            html.P(f"{df['Isoelectric_Point'].quantile(0.25):.1f}-{df['Isoelectric_Point'].quantile(0.75):.1f}")
        ], className='card', style={'background': '#f39c12'})
    ], className='card-container'),

    # Row 2: Protein Explorer
    html.Div([
        dcc.Dropdown(
            id='protein-selector',
            options=[{'label': f"{row['ID']} ({row['Seq_Length']}aa)", 'value': i}
                    for i, row in df.iterrows()],
            value=0,
            style={'width': '100%'}
        ),
        html.Div([
            html.H4(id='protein-name'),
            html.P(id='protein-id'),
            html.Div([
                html.Div([
                    html.P("Molecular Weight:"),
                    html.P(id='protein-mw', className='metric-value')
                ], className='metric-box'),
                html.Div([
                    html.P("Isoelectric Point:"),
                    html.P(id='protein-pi', className='metric-value')
                ], className='metric-box'),
                html.Div([
                    html.P("Hydrophobicity:"),
                    html.P(id='protein-hyd', className='metric-value')
                ], className='metric-box')
            ], style={'display': 'flex', 'gap': '20px', 'margin': '10px 0'}),
            dcc.Graph(id='aa-composition')
        ], style={'border': '1px solid #ddd', 'padding': '20px', 'borderRadius': '5px'})
    ]),

    # Row 3: Distribution Plots
    html.Div([
        dcc.Graph(id='mw-distribution'),
        dcc.Graph(id='pi-distribution')
    ], style={'columnCount': 2}),

    # Row 4: Advanced Analysis
    html.Div([
        dcc.Dropdown(
            id='analysis-type',
            options=[
                {'label': 'MW vs pI', 'value': 'mw_pi'},
                {'label': 'Length vs Hydrophobicity', 'value': 'len_hyd'},
                {'label': 'Amino Acid Correlation', 'value': 'aa_corr'}
            ],
            value='mw_pi',
            style={'width': '50%'}
        ),
        dcc.Graph(id='analysis-plot')
    ]),

    # Data Table
    dash_table.DataTable(
        id='data-table',
        columns=[{'name': col, 'id': col} for col in df.columns],
        data=df.to_dict('records'),
        page_size=10,
        style_table={'overflowX': 'auto', 'height': '300px'},
        style_cell={
            'textAlign': 'left',
            'padding': '8px',
            'whiteSpace': 'normal',
            'maxWidth': '200px'
        },
        filter_action='native'
    )
], style={'fontFamily': 'Arial', 'padding': '20px'})



In [7]:
# Callbacks
@app.callback(
    [Output('protein-name', 'children'),
     Output('protein-id', 'children'),
     Output('protein-mw', 'children'),
     Output('protein-pi', 'children'),
     Output('protein-hyd', 'children'),
     Output('aa-composition', 'figure')],
    [Input('protein-selector', 'value')]
)
def update_protein_view(selected_idx):
    protein = df.iloc[selected_idx]

    # AA Composition plot
    aa_df = pd.DataFrame.from_dict(protein['Amino_Acid_Composition'], orient='index').reset_index()
    aa_df.columns = ['Amino Acid', 'Count']
    aa_fig = px.bar(aa_df, x='Amino Acid', y='Count',
                   title='Amino Acid Composition',
                   color='Amino Acid',
                   color_discrete_sequence=px.colors.qualitative.Plotly)

    return (
        protein['Name'],
        f"UniProt ID: {protein['ID']}",
        f"{protein['Molecular_Weight']/1000:.2f} kDa",
        f"{protein['Isoelectric_Point']:.2f}",
        f"{protein['Hydrophobicity']:.3f}",
        aa_fig
    )



In [8]:
@app.callback(
    [Output('mw-distribution', 'figure'),
     Output('pi-distribution', 'figure')],
    [Input('protein-selector', 'value')]
)
def update_distributions(_):
    mw_fig = px.histogram(df, x='Molecular_Weight', nbins=50,
                         title='Molecular Weight Distribution (Da)',
                         color_discrete_sequence=['#3498db'])

    pi_fig = px.histogram(df, x='Isoelectric_Point', nbins=30,
                         title='Isoelectric Point Distribution',
                         color_discrete_sequence=['#e74c3c'])

    return mw_fig, pi_fig

@app.callback(
    Output('analysis-plot', 'figure'),
    [Input('analysis-type', 'value')]
)
def update_analysis_plot(analysis_type):
    if analysis_type == 'mw_pi':
        fig = px.scatter(df, x='Molecular_Weight', y='Isoelectric_Point',
                        color='Seq_Length',
                        title='Molecular Weight vs Isoelectric Point',
                        hover_data=['ID'],
                        color_continuous_scale='Viridis')
    elif analysis_type == 'len_hyd':
        fig = px.scatter(df, x='Seq_Length', y='Hydrophobicity',
                        color='Molecular_Weight',
                        title='Protein Length vs Hydrophobicity',
                        hover_data=['ID'],
                        color_continuous_scale='Plasma')
    else:  # aa_corr
        # Extract top 5 most variable AAs
        aa_variability = []
        for aa in 'ACDEFGHIKLMNPQRSTVWY':
            aa_counts = [comp.get(aa, 0) for comp in df['Amino_Acid_Composition']]
            aa_variability.append((aa, np.std(aa_counts)))

        top_aas = [aa for aa, _ in sorted(aa_variability, key=lambda x: x[1], reverse=True)[:5]]
        plot_data = df.copy()
        for aa in top_aas:
            plot_data[aa] = plot_data['Amino_Acid_Composition'].apply(lambda x: x.get(aa, 0))

        fig = px.scatter_matrix(plot_data, dimensions=top_aas,
                              title='Amino Acid Composition Correlation',
                              color='Seq_Length')

    return fig



In [10]:
# Run App
app.run(mode='inline')

<IPython.core.display.Javascript object>