In [7]:
import pandas as pd
import altair as alt

## Enabling full dataset embedding
alt.data_transformers.enable('default', max_rows=None)

## Loading data from .csv files
otu_data = "Tara_OTUtableTax_Full.csv"
meta_data = "Tara_SampleMeta.csv"

otu_df = pd.read_csv(otu_data)
meta_df = pd.read_csv(meta_data)

## Ensuring SampleID is treated as a string and stripping whitespace
meta_df['SampleID'] = meta_df['SampleID'].astype(str).str.strip()

## Melting the OTU table to long format
otu_long = otu_df.melt(id_vars=['Domain', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'OTU_rep'],
                        var_name='SampleID', value_name='Abundance')

## Ensuring SampleID in OTU data is also treated as a string
otu_long['SampleID'] = otu_long['SampleID'].astype(str).str.strip()

## Merging with metadata
otu_long = otu_long.merge(meta_df, on='SampleID', how='left')

## Removing NA values and filtering out zero/negative abundances
otu_long = otu_long.dropna()
otu_long = otu_long[otu_long['Abundance'] > 0]

## Defining color scheme
color_scheme = 'category20b'

## Default chart properties
default_chart_props = {
    'width': 500,
    'height': 400,
}

## Adjusted chart properties for specific plots
wide_chart_props = {
    'width': 600,
    'height': 400,
}

## Tooltip fields
tooltip_fields = ['Phylum', 'Abundance', 'SampleID']

## PLOT 1: Abundance Distribution
distrib_filtered = otu_long[otu_long['Abundance'] >= 0.1]

abundance_distrib = alt.Chart(distrib_filtered).mark_bar().encode(
    x=alt.X('Phylum:N', title='Microbes Distribution (Phylum)', sort='-y', axis=alt.Axis(labelAngle=-45)),
    y=alt.Y('count():Q', title='Count'),
    color=alt.Color('Phylum:N', scale=alt.Scale(scheme=color_scheme), legend=alt.Legend(title='Phylum', orient='right')),
    tooltip=tooltip_fields
).properties(
    title='Abundance Distribution of Microbes',
    **default_chart_props
).interactive()

## PLOT 2: Relative Taxonomic Distribution
stacked_bar = alt.Chart(otu_long).mark_bar().encode(
    x=alt.X('LayerOfOrigin:N', title='Layer of Origin'),
    y=alt.Y('sum(Abundance):Q', title='Total Abundance', stack='normalize'),
    color=alt.Color('Phylum:N', scale=alt.Scale(scheme=color_scheme)),
    tooltip=['Phylum', 'sum(Abundance)']
).properties(
    title='Relative Abundance of Microbial Phyla Across Layers',
    **default_chart_props
).interactive()

## PLOT 3: Microbial Co-Occurrence (Wider chart)
co_occurrence = otu_long.groupby(['Phylum', 'LayerOfOrigin'])['Abundance'].sum().reset_index()
network_chart = alt.Chart(co_occurrence).mark_circle().encode(
    x=alt.X('Phylum:N', title='Phylum'),
    y=alt.Y('LayerOfOrigin:N', title='Layer of Origin'),
    size=alt.Size('Abundance:Q', scale=alt.Scale(range=[10, 500]), title='Abundance'),
    color=alt.Color('Phylum:N', scale=alt.Scale(scheme=color_scheme)),
    tooltip=['Phylum', 'LayerOfOrigin', 'Abundance']
).properties(
    title='Microbial Co-Occurrence Network',
    **wide_chart_props
).interactive()

## PLOT 4: Taxonomic Flow Visualization (Wider chart)
taxonomic_flow = alt.Chart(otu_long).mark_bar().encode(
    x=alt.X('Phylum:N', title='Phylum', sort='-y'),
    y=alt.Y('sum(Abundance):Q', title='Total Abundance'),
    color=alt.Color('LayerOfOrigin:N', scale=alt.Scale(scheme=color_scheme)),
    tooltip=['Phylum', 'LayerOfOrigin', 'sum(Abundance)']
).properties(
    title='Sankey Diagram of Taxonomic Flow',
    **wide_chart_props
).interactive()

## PLOT 5: Enhanced Abundance Variation (Wider chart)
var_filtered = otu_long[otu_long['Abundance'] >= 0.1]

## Create interactive selection for filtering by phylum
phylum_selection = alt.selection_multi(fields=['Phylum'], bind='legend')

abundance_var = alt.Chart(var_filtered).mark_circle(opacity=0.7).encode(
    x=alt.X('SampleID:N', title='Sample ID', axis=alt.Axis(labels=False, tickSize=0)),  # Hide individual labels for clarity
    y=alt.Y('Abundance:Q', scale=alt.Scale(type='log'), title='Abundance (log scale)'),
    color=alt.Color('Phylum:N', scale=alt.Scale(scheme=color_scheme)),
    tooltip=tooltip_fields,
    size=alt.Size('Abundance:Q', scale=alt.Scale(range=[10, 100]), legend=None),
    opacity=alt.condition(phylum_selection, alt.value(0.8), alt.value(0.2))
).properties(
    title='Abundance Variation Across Samples',
    **wide_chart_props
).add_selection(phylum_selection)

## PLOT 6: Abundance Scatter Plot
abundance_scatter = alt.Chart(otu_long[otu_long['LayerOfOrigin'].isin(['DCM', 'MES', 'SRF'])]).mark_circle(size=50, opacity=0.6).encode(
    x='LayerOfOrigin:N',
    y=alt.Y('Abundance:Q', scale=alt.Scale(type='log')),
    color=alt.Color('LayerOfOrigin:N', scale=alt.Scale(scheme=color_scheme)),
    tooltip=['LayerOfOrigin', 'Abundance', 'SampleID']
).properties(
    title='Abundance Scatter Plot Across Sample Classes',
    **default_chart_props
).interactive()

## ARRANGING ALL THE CHARTS IN ROWS
row_1 = alt.hconcat(abundance_distrib, stacked_bar).resolve_scale(color='independent')
row_2 = alt.hconcat(network_chart, taxonomic_flow).resolve_scale(color='independent')
row_3 = alt.hconcat(abundance_var, abundance_scatter).resolve_scale(color='independent')

## FINAL DASHBOARD
final_dashboard = alt.vconcat(row_1, row_2, row_3).resolve_scale(color='independent')

## Saving as HTML
final_dashboard.save("ocean_microbe_dashboard.html", embed_options={'renderer': 'svg'})

print("Visualization saved as ocean_microbe_dashboard.html")


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df

Visualization saved as ocean_microbe_dashboard.html
