In [2]:
import pandas as pd
import numpy as np
import plotly

In [3]:
# Load the data
metadata_unfiltered = pd.read_csv("../data/metadata_filtered.csv")
metagenomics = pd.read_csv("../data/metagenomics.csv")

In [4]:
# Filter metadata for metagenomics
metadata = metadata_unfiltered[metadata_unfiltered["data_type"] == "metagenomics"]

In [5]:
# Filters rows without species-level data
species_df = metagenomics[~metagenomics["species"].isna()]

In [6]:
# Removes samples that don't sum to 100%
sum_100_mask = np.concatenate((np.zeros(9,dtype=bool), np.isclose(species_df.iloc[:,9:].sum(axis=0), 100)))
# Only keep species column
sum_100_mask[7] = True
clean_species_df = species_df.iloc[:, sum_100_mask]


In [7]:
# transpose and specify correct column names
cleaner_species_df = clean_species_df.T
cleaner_species_df.columns = cleaner_species_df.iloc[0,:]
cleaner_species_df = cleaner_species_df.drop(cleaner_species_df.index[0])

In [8]:
# get the infos for the sample we actually want
sample_list = np.array([x.split("_profile")[0] for x in cleaner_species_df.index])
sample_in_meta_idx = np.array([list(metadata["External.ID"]).index(x) for x in sample_list])

In [9]:
# put the two dataframes together
combined_df = pd.concat ((metadata.iloc[sample_in_meta_idx,:].reset_index(), cleaner_species_df.reset_index()), axis=1)
combined_df = combined_df.drop(columns=["index", "Unnamed: 0"])

In [10]:
# get the features for UMAP
features = np.array(combined_df.iloc[:,15:])

In [11]:
# compute umaps
from umap import UMAP

umap_2d = UMAP(n_components=2, init='random', random_state=0)
umap_3d = UMAP(n_components=3, init='random', random_state=0)

proj_2d = umap_2d.fit_transform(features)
proj_3d = umap_3d.fit_transform(features)

  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


In [12]:
umap_df = pd.concat ((
    combined_df,
    pd.DataFrame ({
        "umap_2d_x" : proj_2d[:,0],
        "umap_2d_y" : proj_2d[:,1],
        "umap_3d_x" : proj_3d[:,0],
        "umap_3d_y" : proj_3d[:,1],
        "umap_3d_z" : proj_3d[:,2],
    })
    ), axis=1)

In [13]:
# umap_df

In [14]:
# these are the columns where we find the species
microbe_column_indices = np.arange(15,593)
species_names = umap_df.columns[microbe_column_indices]

In [15]:
umap_df['gini_simpson'] = 1 - np.sum(np.power(umap_df.iloc[:, microbe_column_indices] / 100, 2), axis = 1)

In [16]:
import plotly.graph_objects as go
import plotly.express as px

In [17]:
import matplotlib.pyplot as plt
def assign_colors(categories):
    unique_categories = np.unique(categories)
    num_categories = len(unique_categories)
    color_map = plt.get_cmap('jet')  # You can change 'tab10' to other colormaps if desired
    colors = [color_map(i / num_categories) for i in range(num_categories)]
    category_to_color = dict(zip(unique_categories, colors))
    return [category_to_color[cat] for cat in categories]

In [18]:
from dash import Dash, html, dcc, Input, Output, callback
import pandas as pd
import plotly.express as px

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.read_csv('https://plotly.github.io/datasets/country_indicators.csv')


app.layout = html.Div([
    html.Div([
        html.Label(
            'Coloring scheme:',
            style={
                'color' : 'white'
            }),
        html.Div([
            dcc.Dropdown(
                umap_df.columns,
                'gini_simpson',
                id='crossfilter-xaxis-column',
            )
        ],
        style={'width': '49%', 'display': 'inline-block'}),
    ], style={
        'padding': '10px 5px'
    }),

    html.Div([
        dcc.Graph(
            id='crossfilter-indicator-scatter',
            hoverData={'points': [{'customdata': ['CSM5FZ3N_P', 'foo']}]}
        )
    ], style={'width': '49%', 'display': 'inline-block', 'padding': '0 20'}),
    html.Div([
        dcc.Graph(id='x-time-series')
    ], style={'display': 'inline-block', 'width': '49%'}),
])


@callback(
    Output('crossfilter-indicator-scatter', 'figure'),
    Input('crossfilter-xaxis-column', 'value')
)
def update_graph(color_by):

    if color_by in ['site_name', 'diagnosis']:
        markerDict = dict(color=assign_colors(umap_df[color_by]))
    else:
        markerDict = dict(color=umap_df[color_by],
                            colorscale = 'Viridis')
        markerDict['colorbar'] = dict(thickness=20, x=-0.05, title=color_by)

    markerDict['opacity'] = 0.8


    fig_3d = go.Figure()
    fig_3d.add_trace(go.Scatter3d(
        x=umap_df["umap_3d_x"],
        y=umap_df["umap_3d_y"],
        z=umap_df["umap_3d_z"],
        mode='markers',
        # marker=dict(color=umap_df[color_by], colorscale='Viridis', colorbar = dict(thickness=20), opacity=0.8),
        marker=markerDict,
        # customdata=umap_df['External.ID'],
        customdata=umap_df[['External.ID', 'Participant.ID']],
        # hovertemplate=(
        #     "External.ID: %{customdata}<extra></extra>"
        # )
        hovertemplate=(
        "External.ID: %{customdata[0]}<br>"
        "Participant.ID: %{customdata[1]}<extra></extra>"
        )
    ))

    fig_3d.update_layout(
        coloraxis_colorbar_x=-0.1,
        scene=dict(
            xaxis_title='UMAP 1',
            yaxis_title='UMAP 2',
            zaxis_title='UMAP 3'
        ),
        legend_title='Diagnosis'
    )
    fig_3d.update_layout(margin={'l': 40, 'b': 40, 't': 10, 'r': 0}, hovermode='closest')

    return fig_3d


@callback(
    Output('x-time-series', 'figure'),
    Input('crossfilter-indicator-scatter', 'hoverData'))
def species_abd(hoverData):
    
    participant = hoverData['points'][0]['customdata'][0]
    values = umap_df[umap_df['External.ID'] == participant].iloc[:,microbe_column_indices].values[0]
    species = umap_df[umap_df['External.ID'] == participant].iloc[:,microbe_column_indices].columns

    fig_2d = go.Figure()
    fig_2d.add_trace(go.Scatter(
        y=values,
        x=np.arange(578),
        mode='markers',
        customdata=species,
        hovertemplate=(
            "Species: %{customdata}<extra></extra>"
            )
        )
    )

    

    fig_2d.update_traces(marker=dict(
                                     size=8,
                                     line=dict(width=0, color='DarkSlateGrey')), 
                                     selector=dict(mode='markers'))

    fig_2d.update_layout(
            title="Species Abundances",
            yaxis_title="Relative Abundance",
            xaxis_title="Species",
            yaxis=dict(range=[0, 100]))
    
    return fig_2d

if __name__ == '__main__':
    app.run(debug=True, port=5243, host="127.0.0.1")


In [19]:
from dash import Dash, html, dcc, Input, Output, callback
import pandas as pd
import plotly.express as px

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.read_csv('https://plotly.github.io/datasets/country_indicators.csv')


app.layout = html.Div([
    html.Div([
        html.Label(
            'Color by:',
            style={
                'color' : 'white'
            }),
        html.Div([
            dcc.Dropdown(
                umap_df.columns,
                'gini_simpson',
                id='crossfilter-xaxis-column',
            )
        ],
        style={'width': '49%', 'display': 'inline-block'}),
    ], style={
        'padding': '10px 5px'
    }),
    html.Div([
        html.Label(
            'Focal participant:',
            style={
                'color' : 'white'
            }),
        html.Div([
            dcc.Dropdown(
                sorted(umap_df['Participant.ID'].unique()),
                'C3001',
                id='participant_dropdown',
            )
        ],
        style={'width': '49%', 'display': 'inline-block'}),
    ], style={
        'padding': '10px 5px'
    }),
    html.Div([
        dcc.Graph(
            id='crossfilter-indicator-scatter',
            hoverData={'points': [{'customdata': ['CSM5FZ3N_P', 'Participant.ID', 'foo', 'diagnosis','site_name', 'sex', 'Age.at.diagnosis']}]}
        )
    ], style={'width': '49%', 'display': 'inline-block', 'padding': '0 20'}),
    html.Div([
        dcc.Graph(id='x-time-series')
    ], style={'display': 'inline-block', 'width': '49%'}),
])


@callback(
    Output('crossfilter-indicator-scatter', 'figure'),
    [Input('crossfilter-xaxis-column', 'value'),
     Input('participant_dropdown', 'value')]
)
def update_graph(color_by, focal_participant):

    if color_by in ['site_name', 'diagnosis', 'sex']:
        markerDict = dict(color=assign_colors(umap_df[color_by]))
    else:
        markerDict = dict(color=umap_df[color_by],
                            colorscale = 'Viridis')
        markerDict['colorbar'] = dict(thickness=20, x=-0.05, title=color_by)

    markerDict['opacity'] = 0.1


# Index(['External.ID', 'Participant.ID', 'site_name', 'site_sub_coll',
#        'data_type', 'week_num', 'reads_raw', 'reads_filtered',
#        'Age.at.diagnosis', 'biopsy_location', 'diagnosis', 'is_inflamed',
#        'Modified.Baron.s.Score', 'SES.CD.Score', 'sex',

    fig_3d = go.Figure()
    fig_3d.add_trace(go.Scatter3d(
        x=umap_df["umap_3d_x"],
        y=umap_df["umap_3d_y"],
        z=umap_df["umap_3d_z"],
        mode='markers',
        # marker=dict(color=umap_df[color_by], colorscale='Viridis', colorbar = dict(thickness=20), opacity=0.8),
        marker=markerDict,
        # customdata=umap_df['External.ID'],
        customdata=umap_df[['External.ID', 'Participant.ID', 'diagnosis', 'week_num', 'site_name', 'sex', 'Age.at.diagnosis']],
        # hovertemplate=(
        #     "External.ID: %{customdata}<extra></extra>"
        # )
        hovertemplate=(
        "External ID: %{customdata[0]}<br>"
        "Participant ID: %{customdata[1]}<br>"
        "Diagnosis: %{customdata[2]}<br>"
        "Sample Week: %{customdata[3]}<br>"
        "Sample Site: %{customdata[4]}<br>"
        "Sex: %{customdata[5]}<br>"
        "Age of Diagnosis: %{customdata[6]}<extra></extra>"
        ), name='All participants'
    ))

    fig_3d.add_trace(go.Scatter3d(
        x=umap_df["umap_3d_x"][umap_df['Participant.ID'] == focal_participant],
        y=umap_df["umap_3d_y"][umap_df['Participant.ID'] == focal_participant],
        z=umap_df["umap_3d_z"][umap_df['Participant.ID'] == focal_participant],
        mode='markers',
        marker=dict(color='red', opacity=1),
        # marker=markerDict,
        # customdata=umap_df['External.ID'],
        customdata=umap_df[['External.ID', 'Participant.ID', 'diagnosis', 'week_num','site_name', 'sex', 'Age.at.diagnosis']][umap_df['Participant.ID'] == focal_participant],
        # hovertemplate=(
        #     "External.ID: %{customdata}<extra></extra>"
        # )
       hovertemplate=(
            "External ID: %{customdata[0]}<br>"
            "Participant ID: %{customdata[1]}<br>"
            "Diagnosis: %{customdata[2]}<br>"
            "Sample Week: %{customdata[3]}<br>"
            "Sample Site: %{customdata[4]}<br>"
            "Sex: %{customdata[5]}<br>"
            "Age of Diagnosis: %{customdata[6]}<extra></extra>"
        ), name='Focal participant'
    ))

    fig_3d.update_layout(
        coloraxis_colorbar_x=-0.1,
        scene=dict(
            xaxis_title='UMAP 1',
            yaxis_title='UMAP 2',
            zaxis_title='UMAP 3'
        ),
        # legend_title='Diagnosis'
    )
    fig_3d.update_layout(margin={'l': 40, 'b': 40, 't': 10, 'r': 0}, hovermode='closest')

    # get the bounds
    bounds = {
        "umap_3d_x" : (umap_df["umap_3d_x"].min(), umap_df["umap_3d_x"].max()),
        "umap_3d_y" : (umap_df["umap_3d_y"].min(), umap_df["umap_3d_y"].max()),
        "umap_3d_z" : (umap_df["umap_3d_z"].min(), umap_df["umap_3d_z"].max()),
    }

    fig_3d.update_layout(
        scene = {
            'xaxis' : {
                'range' : bounds["umap_3d_x"], 'nticks' : 6,
            },
            'yaxis' : {
                'range' : bounds["umap_3d_y"], 'nticks' : 6,
            },
            'zaxis' : {
                'range' : bounds["umap_3d_z"], 'nticks' : 6,
            },
            "aspectratio" : dict(x=1, y=1, z=1),
        },
    )

    return fig_3d


@callback(
    Output('x-time-series', 'figure'),
    Input('crossfilter-indicator-scatter', 'hoverData'))
def species_abd(hoverData):
    
    participant = hoverData['points'][0]['customdata'][0]
    values = umap_df[umap_df['External.ID'] == participant].iloc[:,microbe_column_indices].values[0]
    species = umap_df[umap_df['External.ID'] == participant].iloc[:,microbe_column_indices].columns

    fig_2d = go.Figure()
    fig_2d.add_trace(go.Scatter(
        y=values,
        x=np.arange(578),
        mode='markers',
        customdata=species,
        hovertemplate=(
            "Species: %{customdata}<extra></extra>"
            )
        )
    )

    

    fig_2d.update_traces(marker=dict(
                                     size=8,
                                     line=dict(width=0, color='DarkSlateGrey')), 
                                     selector=dict(mode='markers'))

    fig_2d.update_layout(
            title="Species Abundances",
            yaxis_title="Relative Abundance",
            xaxis_title="Species",
            yaxis=dict(range=[0, 100]))
    
    return fig_2d

if __name__ == '__main__':
    app.run(debug=True, port=5243, host="127.0.0.1")


In [20]:
from dash import Dash, html, dcc, Input, Output, callback
import pandas as pd
import plotly.express as px

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.read_csv('https://plotly.github.io/datasets/country_indicators.csv')


app.layout = html.Div([
    html.Div([
        html.Label(
            'Color by:',
            style={
                'color' : 'white'
            }),
        html.Div([
            dcc.Dropdown(
                umap_df.columns,
                'gini_simpson',
                id='crossfilter-xaxis-column',
            )
        ],
        style={'width': '30%', 'display': 'inline-block'}),
    ], style={
        'padding': '10px 5px'
    }),
    html.Div([
        html.Label(
            'Focal participant:',
            style={
                'color' : 'white'
            }),
        html.Div([
            dcc.Dropdown(
                sorted(umap_df['Participant.ID'].unique()),
                'C3001',
                id='participant_dropdown',
            )
        ],
        style={'width': '30%', 'display': 'inline-block'}),
    ], style={
        'padding': '10px 5px'
    }),
    html.Div([
        html.Label(
            'Opacity:',
            style={
                'color' : 'white'
            }),
        html.Div([
        dcc.Slider(
            id='opacity_slider',
            min=0,
            max=1,
            value=0.1,
            marks={f"{num_week:.1f}": f"{num_week:.1f}" for num_week in np.linspace(0,1,11)},
            step=None
        )],
        style={'width': '30%', 'display': 'inline-block'}),
    ], style={
        'padding': '10px 5px'
    }),
    html.Div([
        dcc.Graph(
            id='crossfilter-indicator-scatter',
            hoverData={'points': [{'customdata': ['CSM5FZ3N_P', 'Participant.ID', 'foo', 'diagnosis','site_name', 'sex', 'Age.at.diagnosis']}]}
        )
    ], style={'width': '49%', 'display': 'inline-block', 'padding': '0 20'}),
    html.Div([
        dcc.Graph(id='x-time-series')
    ], style={'display': 'inline-block', 'width': '49%'}),
])


@callback(
    Output('crossfilter-indicator-scatter', 'figure'),
    [Input('crossfilter-xaxis-column', 'value'),
     Input('participant_dropdown', 'value'),
     Input('opacity_slider', 'value')]
)
def update_graph(color_by, focal_participant, opacity_value=0.5):

    if color_by in ['site_name', 'diagnosis', 'sex']:
        markerDict = dict(color=assign_colors(umap_df[color_by]))
    else:
        markerDict = dict(color=umap_df[color_by],
                            colorscale = 'Viridis')
        markerDict['colorbar'] = dict(thickness=20, x=-0.05, title=color_by)

    # markerDict['opacity'] = 0.1
    markerDict['opacity'] = opacity_value


# Index(['External.ID', 'Participant.ID', 'site_name', 'site_sub_coll',
#        'data_type', 'week_num', 'reads_raw', 'reads_filtered',
#        'Age.at.diagnosis', 'biopsy_location', 'diagnosis', 'is_inflamed',
#        'Modified.Baron.s.Score', 'SES.CD.Score', 'sex',

    fig_3d = go.Figure()
    fig_3d.add_trace(go.Scatter3d(
        x=umap_df["umap_3d_x"],
        y=umap_df["umap_3d_y"],
        z=umap_df["umap_3d_z"],
        mode='markers',
        # marker=dict(color=umap_df[color_by], colorscale='Viridis', colorbar = dict(thickness=20), opacity=0.8),
        marker=markerDict,
        # customdata=umap_df['External.ID'],
        customdata=umap_df[['External.ID', 'Participant.ID', 'diagnosis', 'week_num', 'site_name', 'sex', 'Age.at.diagnosis']],
        # hovertemplate=(
        #     "External.ID: %{customdata}<extra></extra>"
        # )
        hovertemplate=(
        "External ID: %{customdata[0]}<br>"
        "Participant ID: %{customdata[1]}<br>"
        "Diagnosis: %{customdata[2]}<br>"
        "Sample Week: %{customdata[3]}<br>"
        "Sample Site: %{customdata[4]}<br>"
        "Sex: %{customdata[5]}<br>"
        "Age of Diagnosis: %{customdata[6]}<extra></extra>"
        ), name='All participants'
    ))

    fig_3d.add_trace(go.Scatter3d(
        x=umap_df["umap_3d_x"][umap_df['Participant.ID'] == focal_participant],
        y=umap_df["umap_3d_y"][umap_df['Participant.ID'] == focal_participant],
        z=umap_df["umap_3d_z"][umap_df['Participant.ID'] == focal_participant],
        mode='markers',
        marker=dict(color='red', opacity=1),
        # marker=markerDict,
        # customdata=umap_df['External.ID'],
        customdata=umap_df[['External.ID', 'Participant.ID', 'diagnosis', 'week_num','site_name', 'sex', 'Age.at.diagnosis']][umap_df['Participant.ID'] == focal_participant],
        # hovertemplate=(
        #     "External.ID: %{customdata}<extra></extra>"
        # )
       hovertemplate=(
            "External ID: %{customdata[0]}<br>"
            "Participant ID: %{customdata[1]}<br>"
            "Diagnosis: %{customdata[2]}<br>"
            "Sample Week: %{customdata[3]}<br>"
            "Sample Site: %{customdata[4]}<br>"
            "Sex: %{customdata[5]}<br>"
            "Age of Diagnosis: %{customdata[6]}<extra></extra>"
        ), name='Focal participant'
    ))

    fig_3d.update_layout(
        coloraxis_colorbar_x=-0.1,
        scene=dict(
            xaxis_title='UMAP 1',
            yaxis_title='UMAP 2',
            zaxis_title='UMAP 3'
        ),
        # legend_title='Diagnosis'
    )
    fig_3d.update_layout(margin={'l': 40, 'b': 40, 't': 10, 'r': 0}, hovermode='closest')

    # get the bounds
    bounds = {
        "umap_3d_x" : (umap_df["umap_3d_x"].min(), umap_df["umap_3d_x"].max()),
        "umap_3d_y" : (umap_df["umap_3d_y"].min(), umap_df["umap_3d_y"].max()),
        "umap_3d_z" : (umap_df["umap_3d_z"].min(), umap_df["umap_3d_z"].max()),
    }

    fig_3d.update_layout(
        scene = {
            'xaxis' : {
                'range' : bounds["umap_3d_x"], 'nticks' : 6,
            },
            'yaxis' : {
                'range' : bounds["umap_3d_y"], 'nticks' : 6,
            },
            'zaxis' : {
                'range' : bounds["umap_3d_z"], 'nticks' : 6,
            },
            "aspectratio" : dict(x=1, y=1, z=1),
        },
    )

    return fig_3d


@callback(
    Output('x-time-series', 'figure'),
    Input('crossfilter-indicator-scatter', 'hoverData'))
def species_abd(hoverData):
    
    participant = hoverData['points'][0]['customdata'][0]
    values = umap_df[umap_df['External.ID'] == participant].iloc[:,microbe_column_indices].values[0]
    species = umap_df[umap_df['External.ID'] == participant].iloc[:,microbe_column_indices].columns

    fig_2d = go.Figure()
    fig_2d.add_trace(go.Scatter(
        y=values,
        x=np.arange(578),
        mode='markers',
        customdata=species,
        hovertemplate=(
            "Species: %{customdata}<extra></extra>"
            )
        )
    )

    

    fig_2d.update_traces(marker=dict(
                                     size=8,
                                     line=dict(width=0, color='DarkSlateGrey')), 
                                     selector=dict(mode='markers'))

    fig_2d.update_layout(
            title="Species Abundances",
            yaxis_title="Relative Abundance",
            xaxis_title="Species",
            yaxis=dict(range=[0, 100]))
    
    return fig_2d

if __name__ == '__main__':
    app.run(debug=True, port=5243, host="127.0.0.1")
