diff --git a/.github/workflows/cdci.yml b/.github/workflows/cdci.yml index 1fa82e6..1928d26 100644 --- a/.github/workflows/cdci.yml +++ b/.github/workflows/cdci.yml @@ -7,7 +7,7 @@ on: branches: [main] release: types: [published] - + jobs: test: name: Unittests+streamlit @@ -17,14 +17,16 @@ jobs: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 - - uses: psf/black@stable + - uses: psf/black@stable + # with: + # jupyter: true - uses: isort/isort-action@v1 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: 'pip' # caching pip dependencies - cache-dependency-path: '**/pyproject.toml' + cache: "pip" # caching pip dependencies + cache-dependency-path: "**/pyproject.toml" - name: Install dependencies run: | python -m pip install --upgrade pip @@ -37,7 +39,7 @@ jobs: cd docs vuegen --directory example_data/Earth_microbiome_vuegen_demo_notebook vuegen --config example_data/Earth_microbiome_vuegen_demo_notebook/Earth_microbiome_vuegen_demo_notebook_config.yaml - + other-reports: name: Integration tests runs-on: ubuntu-latest @@ -50,8 +52,8 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: 'pip' # caching pip dependencies - cache-dependency-path: '**/pyproject.toml' + cache: "pip" # caching pip dependencies + cache-dependency-path: "**/pyproject.toml" - name: Install dependencies run: | pip install --upgrade pip @@ -110,7 +112,7 @@ jobs: steps: - uses: actions/checkout@v4 - + - uses: actions/setup-python@v5 with: python-version: "3.11" diff --git a/docs/vuegen_basic_case_study.ipynb b/docs/vuegen_basic_case_study.ipynb index 1268256..be4ade0 100644 --- a/docs/vuegen_basic_case_study.ipynb +++ b/docs/vuegen_basic_case_study.ipynb @@ -67,7 +67,7 @@ }, "outputs": [], "source": [ - "# Vuegen library \n", + "# Vuegen library\n", "%pip install vuegen" ] }, @@ -78,6 +78,7 @@ "outputs": [], "source": [ "import os\n", + "\n", "IN_COLAB = \"COLAB_GPU\" in os.environ" ] }, @@ -164,7 +165,9 @@ "source": [ "# Generate the report\n", "report_type = \"streamlit\"\n", - "report_generator.get_report(dir_path = base_output_dir, report_type = report_type, logger = None)" + "report_generator.get_report(\n", + " dir_path=base_output_dir, report_type=report_type, logger=None\n", + ")" ] }, { @@ -185,14 +188,20 @@ "# run_streamlit = True # uncomment line to run the streamlit report\n", "# Launch the Streamlit report depneding on the platform\n", "if not IN_COLAB and run_streamlit:\n", - " !streamlit run streamlit_report/sections/report_manager.py\n", + " !streamlit run streamlit_report/sections/report_manager.py\n", "elif run_streamlit:\n", - " # see: https://discuss.streamlit.io/t/how-to-launch-streamlit-app-from-google-colab-notebook/42399\n", - " print(\"Password/Enpoint IP for localtunnel is:\",urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip(\"\\n\"))\n", - " # Run the Streamlit app in the background\n", - " !streamlit run streamlit_report/sections/report_manager.py --server.address=localhost &>/content/logs.txt &\n", - " # Expose the Streamlit app on port 8501\n", - " !npx localtunnel --port 8501 --subdomain vuegen-demo\n", + " # see: https://discuss.streamlit.io/t/how-to-launch-streamlit-app-from-google-colab-notebook/42399\n", + " print(\n", + " \"Password/Enpoint IP for localtunnel is:\",\n", + " urllib.request.urlopen(\"https://ipv4.icanhazip.com\")\n", + " .read()\n", + " .decode(\"utf8\")\n", + " .strip(\"\\n\"),\n", + " )\n", + " # Run the Streamlit app in the background\n", + " !streamlit run streamlit_report/sections/report_manager.py --server.address=localhost &>/content/logs.txt &\n", + " # Expose the Streamlit app on port 8501\n", + " !npx localtunnel --port 8501 --subdomain vuegen-demo\n", "else:\n", " print(\"Streamlit report not executed, set run_streamlit to True to run the report\")" ] @@ -212,7 +221,9 @@ "source": [ "# Generate the report\n", "report_type = \"html\"\n", - "report_generator.get_report(dir_path = base_output_dir, report_type = report_type, logger = None)" + "report_generator.get_report(\n", + " dir_path=base_output_dir, report_type=report_type, logger=None\n", + ")" ] }, { @@ -239,11 +250,15 @@ "vuegen_logo_path = \"https://raw.githubusercontent.com/Multiomics-Analytics-Group/vuegen/main/docs/images/vuegen_logo.svg\"\n", "\n", "# Load the YAML file\n", - "config_path = os.path.join(base_output_dir, \"Basic_example_vuegen_demo_notebook_config.yaml\")\n", + "config_path = os.path.join(\n", + " base_output_dir, \"Basic_example_vuegen_demo_notebook_config.yaml\"\n", + ")\n", "config = load_yaml_config(config_path)\n", "\n", "# Update the logo and graphical abstract with the URL\n", - "config[\"report\"].update({\"logo\": vuegen_logo_path, \"graphical_abstract\": vuegen_logo_path})" + "config[\"report\"].update(\n", + " {\"logo\": vuegen_logo_path, \"graphical_abstract\": vuegen_logo_path}\n", + ")" ] }, { @@ -261,7 +276,7 @@ "source": [ "# Update the description for the EDA section\n", "for section in config[\"sections\"]:\n", - " if section[\"title\"] == \"Plots\": \n", + " if section[\"title\"] == \"Plots\":\n", " section[\"description\"] = \"This section contains example plots\"\n", "\n", "# Update the description for the alpha diversity subsection from the Metagenomics section\n", @@ -269,7 +284,9 @@ " if section[\"title\"] == \"Dataframes\":\n", " for subsection in section[\"subsections\"]:\n", " if subsection[\"title\"] == \"All Formats\":\n", - " subsection[\"description\"] = \"This subsection contains example dataframes.\"\n" + " subsection[\"description\"] = (\n", + " \"This subsection contains example dataframes.\"\n", + " )" ] }, { @@ -288,11 +305,11 @@ "# Define new plot with a URL as the file path\n", "vuegen_abst_fig = {\n", " \"title\": \"Graphical overview of VueGen’s workflow and components\",\n", - " \"file_path\": \"https://raw.githubusercontent.com/Multiomics-Analytics-Group/vuegen/main/docs/images/vuegen_graph_abstract.png\", \n", + " \"file_path\": \"https://raw.githubusercontent.com/Multiomics-Analytics-Group/vuegen/main/docs/images/vuegen_graph_abstract.png\",\n", " \"description\": \"\",\n", " \"caption\": \"The diagram illustrates the processing pipeline of VueGen, starting from either a directory or a YAML configuration file. Reports consist of hierarchical sections and subsections, each containing various components such as plots, dataframes, Markdown, HTML, and data retrieved via API calls.\",\n", " \"component_type\": \"plot\",\n", - " \"plot_type\": \"static\"\n", + " \"plot_type\": \"static\",\n", "}\n", "\n", "# Add the plot to the Sample Provenance subsection in the EDA section\n", @@ -324,7 +341,9 @@ "source": [ "# Test the changes by generarating the report from the modified YAML file\n", "report_type = \"streamlit\"\n", - "report_generator.get_report(config_path = config_path, report_type = report_type, logger = None)" + "report_generator.get_report(\n", + " config_path=config_path, report_type=report_type, logger=None\n", + ")" ] }, { @@ -337,14 +356,20 @@ "# run_streamlit = True # uncomment line to run the streamlit report\n", "# Launch the Streamlit report depneding on the platform\n", "if not IN_COLAB and run_streamlit:\n", - " !streamlit run streamlit_report/sections/report_manager.py\n", + " !streamlit run streamlit_report/sections/report_manager.py\n", "elif run_streamlit:\n", - " # see: https://discuss.streamlit.io/t/how-to-launch-streamlit-app-from-google-colab-notebook/42399\n", - " print(\"Password/Enpoint IP for localtunnel is:\",urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip(\"\\n\"))\n", - " # Run the Streamlit app in the background\n", - " !streamlit run streamlit_report/sections/report_manager.py --server.address=localhost &>/content/logs.txt &\n", - " # Expose the Streamlit app on port 8501\n", - " !npx localtunnel --port 8501 --subdomain vuegen-demo\n", + " # see: https://discuss.streamlit.io/t/how-to-launch-streamlit-app-from-google-colab-notebook/42399\n", + " print(\n", + " \"Password/Enpoint IP for localtunnel is:\",\n", + " urllib.request.urlopen(\"https://ipv4.icanhazip.com\")\n", + " .read()\n", + " .decode(\"utf8\")\n", + " .strip(\"\\n\"),\n", + " )\n", + " # Run the Streamlit app in the background\n", + " !streamlit run streamlit_report/sections/report_manager.py --server.address=localhost &>/content/logs.txt &\n", + " # Expose the Streamlit app on port 8501\n", + " !npx localtunnel --port 8501 --subdomain vuegen-demo\n", "else:\n", " print(\"Streamlit report not executed, set run_streamlit to True to run the report\")" ] @@ -364,7 +389,9 @@ "source": [ "# Test the changes by generarating the report from the modified YAML file\n", "report_type = \"html\"\n", - "report_generator.get_report(config_path = config_path, report_type = report_type, logger = None)" + "report_generator.get_report(\n", + " config_path=config_path, report_type=report_type, logger=None\n", + ")" ] } ], diff --git a/docs/vuegen_case_study_earth_microbiome.ipynb b/docs/vuegen_case_study_earth_microbiome.ipynb index 1dff5eb..d82be94 100644 --- a/docs/vuegen_case_study_earth_microbiome.ipynb +++ b/docs/vuegen_case_study_earth_microbiome.ipynb @@ -70,7 +70,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Vuegen library \n", + "# Vuegen library\n", "%pip install vuegen" ] }, @@ -91,6 +91,7 @@ "outputs": [], "source": [ "import os\n", + "\n", "IN_COLAB = \"COLAB_GPU\" in os.environ" ] }, @@ -216,42 +217,43 @@ " input:\n", " empocat- empo category name (string)\n", " returndict- the user needs the dictionary mapping category to color (boolean)\n", - " \n", + "\n", " output: either a color for passed empocat or the dictionay if returndict=True\"\"\"\n", - " \n", + "\n", " # hex codes for matplotlib colors are described here:\n", " # https://github.com/matplotlib/matplotlib/blob/cf83cd5642506ef808853648b9eb409f8dbd6ff3/lib/matplotlib/_color_data.py\n", "\n", - " empo_cat_color={'EMP sample': '#929591', # 'grey'\n", - " 'Host-associated': '#fb9a99',\n", - " 'Free-living': '#e31a1c',\n", - " 'Animal': '#b2df8a',\n", - " 'Plant': '#33a02c',\n", - " 'Non-saline': '#a6cee3',\n", - " 'Saline': '#1f78b4',\n", - " 'Aerosol (non-saline)': '#d3d3d3', # 'lightgrey'\n", - " 'Animal corpus': '#ffff00', # 'yellow'\n", - " 'Animal distal gut': '#8b4513', # 'saddlebrown'\n", - " 'Animal proximal gut': '#d2b48c', # 'tan'\n", - " 'Animal secretion': '#f4a460', # 'sandybrown'\n", - " 'Animal surface': '#b8860b', # 'darkgoldenrod'\n", - " 'Hypersaline (saline)': '#87cefa', # 'lightskyblue'\n", - " 'Intertidal (saline)': '#afeeee', # 'paleturquoise'\n", - " 'Mock community': '#ff00ff', # 'fuchsia'\n", - " 'Plant corpus': '#7cfc00', # 'lawngreen'\n", - " 'Plant rhizosphere': '#006400', # 'darkgreen'\n", - " 'Plant surface': '#00fa9a', # 'mediumspringgreen'\n", - " 'Sediment (non-saline)': '#ffa07a', # 'lightsalmon'\n", - " 'Sediment (saline)': '#ff6347', # 'tomato'\n", - " 'Soil (non-saline)': '#ff0000', # 'red'\n", - " 'Sterile water blank': '#ee82ee', # 'violet'\n", - " 'Surface (non-saline)': '#000000', # 'black'\n", - " 'Surface (saline)': '#696969', # 'dimgrey'\n", - " 'Water (non-saline)': '#000080', # 'navy'\n", - " 'Water (saline)': '#4169e1' # 'royalblue'\n", - " }\n", - " \n", - " if returndict==True:\n", + " empo_cat_color = {\n", + " \"EMP sample\": \"#929591\", # 'grey'\n", + " \"Host-associated\": \"#fb9a99\",\n", + " \"Free-living\": \"#e31a1c\",\n", + " \"Animal\": \"#b2df8a\",\n", + " \"Plant\": \"#33a02c\",\n", + " \"Non-saline\": \"#a6cee3\",\n", + " \"Saline\": \"#1f78b4\",\n", + " \"Aerosol (non-saline)\": \"#d3d3d3\", # 'lightgrey'\n", + " \"Animal corpus\": \"#ffff00\", # 'yellow'\n", + " \"Animal distal gut\": \"#8b4513\", # 'saddlebrown'\n", + " \"Animal proximal gut\": \"#d2b48c\", # 'tan'\n", + " \"Animal secretion\": \"#f4a460\", # 'sandybrown'\n", + " \"Animal surface\": \"#b8860b\", # 'darkgoldenrod'\n", + " \"Hypersaline (saline)\": \"#87cefa\", # 'lightskyblue'\n", + " \"Intertidal (saline)\": \"#afeeee\", # 'paleturquoise'\n", + " \"Mock community\": \"#ff00ff\", # 'fuchsia'\n", + " \"Plant corpus\": \"#7cfc00\", # 'lawngreen'\n", + " \"Plant rhizosphere\": \"#006400\", # 'darkgreen'\n", + " \"Plant surface\": \"#00fa9a\", # 'mediumspringgreen'\n", + " \"Sediment (non-saline)\": \"#ffa07a\", # 'lightsalmon'\n", + " \"Sediment (saline)\": \"#ff6347\", # 'tomato'\n", + " \"Soil (non-saline)\": \"#ff0000\", # 'red'\n", + " \"Sterile water blank\": \"#ee82ee\", # 'violet'\n", + " \"Surface (non-saline)\": \"#000000\", # 'black'\n", + " \"Surface (saline)\": \"#696969\", # 'dimgrey'\n", + " \"Water (non-saline)\": \"#000080\", # 'navy'\n", + " \"Water (saline)\": \"#4169e1\", # 'royalblue'\n", + " }\n", + "\n", + " if returndict == True:\n", " return empo_cat_color\n", " else:\n", " return empo_cat_color[empocat]" @@ -286,13 +288,15 @@ "outputs": [], "source": [ "# Create the output directory for the EDA section and sample provenance subsection\n", - "sample_prov_output_dir = os.path.join(base_output_dir, \"1_Exploratory_data_analysis/1_sample_exploration/\")\n", + "sample_prov_output_dir = os.path.join(\n", + " base_output_dir, \"1_Exploratory_data_analysis/1_sample_exploration/\"\n", + ")\n", "os.makedirs(sample_prov_output_dir, exist_ok=True)\n", "\n", "# Load data and filter out control samples\n", - "metadata_mapping = 'https://raw.githubusercontent.com//biocore/emp/master/data/mapping-files/emp_qiime_mapping_release1.tsv'\n", + "metadata_mapping = \"https://raw.githubusercontent.com//biocore/emp/master/data/mapping-files/emp_qiime_mapping_release1.tsv\"\n", "metadata_mapping_df = pd.read_table(metadata_mapping, index_col=0)\n", - "metadata_mapping_df = metadata_mapping_df[metadata_mapping_df['empo_1'] != 'Control']" + "metadata_mapping_df = metadata_mapping_df[metadata_mapping_df[\"empo_1\"] != \"Control\"]" ] }, { @@ -312,7 +316,9 @@ "sample_metadata_mapping_df = metadata_mapping_df.sample(100, random_state=42)\n", "\n", "# Export the sample df as a CSV file\n", - "sample_metadata_mapping_df.to_csv(f'{sample_prov_output_dir}/1_metadata_random_subset.csv')" + "sample_metadata_mapping_df.to_csv(\n", + " f\"{sample_prov_output_dir}/1_metadata_random_subset.csv\"\n", + ")" ] }, { @@ -335,12 +341,14 @@ "animal_empo3 = animal_df[\"empo_3\"].unique()\n", "\n", "# Create a figure with Cartopy map projection\n", - "fig, ax = plt.subplots(figsize=(12, 8), dpi=300, subplot_kw={'projection': ccrs.PlateCarree()})\n", + "fig, ax = plt.subplots(\n", + " figsize=(12, 8), dpi=300, subplot_kw={\"projection\": ccrs.PlateCarree()}\n", + ")\n", "\n", "# Add features to the map\n", - "ax.add_feature(cfeature.BORDERS, edgecolor='white', linewidth=0.5)\n", - "ax.add_feature(cfeature.LAND, edgecolor='white', facecolor='lightgray', linewidth=0.5)\n", - "ax.add_feature(cfeature.COASTLINE, edgecolor='white', linewidth=0.5)\n", + "ax.add_feature(cfeature.BORDERS, edgecolor=\"white\", linewidth=0.5)\n", + "ax.add_feature(cfeature.LAND, edgecolor=\"white\", facecolor=\"lightgray\", linewidth=0.5)\n", + "ax.add_feature(cfeature.COASTLINE, edgecolor=\"white\", linewidth=0.5)\n", "\n", "# Set extent (global map)\n", "ax.set_extent([-180, 180, -90, 90])\n", @@ -349,13 +357,21 @@ "for empo3 in animal_empo3:\n", " subset = animal_df[animal_df[\"empo_3\"] == empo3]\n", " color = get_empo_cat_color(empo3) # Get color for category\n", - " ax.scatter(subset[\"longitude_deg\"], subset[\"latitude_deg\"], \n", - " color='none', edgecolors=color, linewidth=1.5, label=empo3, s=40, \n", - " transform=ccrs.PlateCarree(), zorder=2)\n", + " ax.scatter(\n", + " subset[\"longitude_deg\"],\n", + " subset[\"latitude_deg\"],\n", + " color=\"none\",\n", + " edgecolors=color,\n", + " linewidth=1.5,\n", + " label=empo3,\n", + " s=40,\n", + " transform=ccrs.PlateCarree(),\n", + " zorder=2,\n", + " )\n", "\n", "# Add legend with updated labels\n", "handles, labels = ax.get_legend_handles_labels()\n", - "ax.legend(handles, labels, loc='lower center', ncol=2, fontsize=10)\n", + "ax.legend(handles, labels, loc=\"lower center\", ncol=2, fontsize=10)\n", "\n", "# Save the figure\n", "animal_map_out_path = os.path.join(sample_prov_output_dir, \"2_animal_samples_map.png\")\n", @@ -376,7 +392,7 @@ "outputs": [], "source": [ "# Extract Plant dataset\n", - "plant_df = metadata_mapping_df[metadata_mapping_df['empo_2'] == 'Plant']\n", + "plant_df = metadata_mapping_df[metadata_mapping_df[\"empo_2\"] == \"Plant\"]\n", "\n", "# Unique subcategories in empo_3\n", "plant_empo3 = plant_df[\"empo_3\"].unique()\n", @@ -389,18 +405,22 @@ " subset = plant_df[plant_df[\"empo_3\"] == empo3]\n", " color = get_empo_cat_color(empo3) # Get color for category\n", "\n", - " fig.add_trace(go.Scattergeo(\n", - " lon=subset[\"longitude_deg\"],\n", - " lat=subset[\"latitude_deg\"],\n", - " mode=\"markers\",\n", - " marker=dict(\n", - " symbol=\"circle-open\", # Unfilled circle\n", - " color=color,\n", - " size=6, # Marker size\n", - " line=dict(width=1.5, color=color) # Border color matches category color\n", - " ),\n", - " name=empo3\n", - " ))\n", + " fig.add_trace(\n", + " go.Scattergeo(\n", + " lon=subset[\"longitude_deg\"],\n", + " lat=subset[\"latitude_deg\"],\n", + " mode=\"markers\",\n", + " marker=dict(\n", + " symbol=\"circle-open\", # Unfilled circle\n", + " color=color,\n", + " size=6, # Marker size\n", + " line=dict(\n", + " width=1.5, color=color\n", + " ), # Border color matches category color\n", + " ),\n", + " name=empo3,\n", + " )\n", + " )\n", "\n", "# Update map layout (fixes horizontal blank space)\n", "fig.update_layout(\n", @@ -412,7 +432,7 @@ " coastlinecolor=\"white\",\n", " fitbounds=\"locations\", # Focuses only on data points\n", " lataxis=dict(range=[-60, 85], showgrid=False), # Custom latitude range\n", - " lonaxis=dict(range=[-180, 180], showgrid=False) # Custom longitude range\n", + " lonaxis=dict(range=[-180, 180], showgrid=False), # Custom longitude range\n", " ),\n", " autosize=False,\n", " width=800, # Adjust width to remove blank space\n", @@ -424,8 +444,8 @@ " x=0.5, # Center legend horizontally\n", " xanchor=\"center\",\n", " yanchor=\"top\",\n", - " orientation=\"h\" # Horizontal legend layout\n", - " )\n", + " orientation=\"h\", # Horizontal legend layout\n", + " ),\n", ")\n", "\n", "# Save the figure as SVG and JSON\n", @@ -457,50 +477,62 @@ "\n", "# Create a dictionary for simplified category names for the legend\n", "simplified_category_names = {\n", - " 'Water (saline)': 'Water',\n", - " 'Sediment (saline)': 'Sediment',\n", - " 'Surface (saline)': 'Surface',\n", - " 'Hypersaline (saline)': 'Hypersaline'\n", + " \"Water (saline)\": \"Water\",\n", + " \"Sediment (saline)\": \"Sediment\",\n", + " \"Surface (saline)\": \"Surface\",\n", + " \"Hypersaline (saline)\": \"Hypersaline\",\n", "}\n", "\n", "# Simplify the empo_3 names in the DataFrame for legend\n", - "saline_df['simplified_empo_3'] = saline_df['empo_3'].apply(lambda x: simplified_category_names.get(x, x))\n", + "saline_df[\"simplified_empo_3\"] = saline_df[\"empo_3\"].apply(\n", + " lambda x: simplified_category_names.get(x, x)\n", + ")\n", "\n", "# Apply the get_empo_cat_color function to generate the color column\n", - "saline_df['color'] = saline_df['empo_3'].apply(get_empo_cat_color)\n", + "saline_df[\"color\"] = saline_df[\"empo_3\"].apply(get_empo_cat_color)\n", "\n", "# Create the base world map (use the CDN URL or a different base map if you prefer)\n", - "countries = alt.topo_feature('https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json', 'countries')\n", + "countries = alt.topo_feature(\n", + " \"https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json\", \"countries\"\n", + ")\n", "\n", "# Create a background map of countries\n", - "background_map = alt.Chart(countries).mark_geoshape(\n", - " fill='lightgray',\n", - " stroke='white'\n", - ").project('equirectangular').properties(\n", - " width=800,\n", - " height=400\n", + "background_map = (\n", + " alt.Chart(countries)\n", + " .mark_geoshape(fill=\"lightgray\", stroke=\"white\")\n", + " .project(\"equirectangular\")\n", + " .properties(width=800, height=400)\n", ")\n", "\n", "# Create the points for saline samples with custom colors\n", - "saline_points = alt.Chart(saline_df).mark_point(size=50, shape='circle', filled=False,).encode(\n", - " longitude='longitude_deg:Q',\n", - " latitude='latitude_deg:Q',\n", - " color=alt.Color('simplified_empo_3:N', \n", - " scale=alt.Scale(domain=list(saline_df['simplified_empo_3'].unique()), \n", - " range=[get_empo_cat_color(cat) for cat in saline_df['empo_3'].unique()]),\n", - " legend=alt.Legend(\n", - " title='',\n", - " orient='bottom', \n", - " symbolSize=120,\n", - " labelFontSize=16 \n", - " )),\n", - " tooltip=['latitude_deg', 'longitude_deg', 'empo_3']\n", + "saline_points = (\n", + " alt.Chart(saline_df)\n", + " .mark_point(\n", + " size=50,\n", + " shape=\"circle\",\n", + " filled=False,\n", + " )\n", + " .encode(\n", + " longitude=\"longitude_deg:Q\",\n", + " latitude=\"latitude_deg:Q\",\n", + " color=alt.Color(\n", + " \"simplified_empo_3:N\",\n", + " scale=alt.Scale(\n", + " domain=list(saline_df[\"simplified_empo_3\"].unique()),\n", + " range=[get_empo_cat_color(cat) for cat in saline_df[\"empo_3\"].unique()],\n", + " ),\n", + " legend=alt.Legend(\n", + " title=\"\", orient=\"bottom\", symbolSize=120, labelFontSize=16\n", + " ),\n", + " ),\n", + " tooltip=[\"latitude_deg\", \"longitude_deg\", \"empo_3\"],\n", + " )\n", ")\n", "\n", "# Overlay the points on the world map\n", "map_with_points = background_map + saline_points\n", "\n", - "# Save as JSON and \n", + "# Save as JSON and\n", "saline_json_path = os.path.join(sample_prov_output_dir, \"4_saline_samples_map.json\")\n", "with open(saline_json_path, \"w\") as f:\n", " f.write(map_with_points.to_json())\n", @@ -544,9 +576,11 @@ "alpha_div_output_dir = os.path.join(base_output_dir, \"2_Metagenomics/1_alpha_diversity\")\n", "os.makedirs(alpha_div_output_dir, exist_ok=True)\n", "\n", - "# Load data \n", - "mapping_qc_filt = 'https://raw.githubusercontent.com//biocore/emp/master/data/mapping-files/emp_qiime_mapping_qc_filtered.tsv'\n", - "mapping_qc_filt_df = pd.read_csv(mapping_qc_filt, sep='\\t', index_col=0, header=0).sort_index()" + "# Load data\n", + "mapping_qc_filt = \"https://raw.githubusercontent.com//biocore/emp/master/data/mapping-files/emp_qiime_mapping_qc_filtered.tsv\"\n", + "mapping_qc_filt_df = pd.read_csv(\n", + " mapping_qc_filt, sep=\"\\t\", index_col=0, header=0\n", + ").sort_index()" ] }, { @@ -555,28 +589,28 @@ "metadata": {}, "outputs": [], "source": [ - "# Define colors of host associated and free living categories \n", + "# Define colors of host associated and free living categories\n", "colorsHA = {\n", - " 'Animal corpus': get_empo_cat_color('Animal corpus'),\n", - " 'Plant corpus': get_empo_cat_color('Plant corpus'),\n", - " 'Animal secretion': get_empo_cat_color('Animal secretion'),\n", - " 'Plant surface': get_empo_cat_color('Plant surface'),\n", - " 'Animal proximal gut': get_empo_cat_color('Animal proximal gut'),\n", - " 'Animal surface': get_empo_cat_color('Animal surface'),\n", - " 'Animal distal gut': get_empo_cat_color('Animal distal gut'),\n", - " 'Plant rhizosphere': get_empo_cat_color('Plant rhizosphere'),\n", + " \"Animal corpus\": get_empo_cat_color(\"Animal corpus\"),\n", + " \"Plant corpus\": get_empo_cat_color(\"Plant corpus\"),\n", + " \"Animal secretion\": get_empo_cat_color(\"Animal secretion\"),\n", + " \"Plant surface\": get_empo_cat_color(\"Plant surface\"),\n", + " \"Animal proximal gut\": get_empo_cat_color(\"Animal proximal gut\"),\n", + " \"Animal surface\": get_empo_cat_color(\"Animal surface\"),\n", + " \"Animal distal gut\": get_empo_cat_color(\"Animal distal gut\"),\n", + " \"Plant rhizosphere\": get_empo_cat_color(\"Plant rhizosphere\"),\n", "}\n", "\n", "colorsFL = {\n", - " 'Water (saline)': get_empo_cat_color('Water (saline)'), \n", - " 'Aerosol (non-saline)': get_empo_cat_color('Aerosol (non-saline)'), \n", - " 'Hypersaline (saline)': get_empo_cat_color('Hypersaline (saline)'),\n", - " 'Surface (non-saline)': get_empo_cat_color('Surface (non-saline)'), \n", - " 'Surface (saline)': get_empo_cat_color('Surface (saline)'), \n", - " 'Water (non-saline)': get_empo_cat_color('Water (non-saline)'), \n", - " 'Sediment (saline)': get_empo_cat_color('Sediment (saline)'), \n", - " 'Soil (non-saline)': get_empo_cat_color('Soil (non-saline)'), \n", - " 'Sediment (non-saline)': get_empo_cat_color('Sediment (non-saline)')\n", + " \"Water (saline)\": get_empo_cat_color(\"Water (saline)\"),\n", + " \"Aerosol (non-saline)\": get_empo_cat_color(\"Aerosol (non-saline)\"),\n", + " \"Hypersaline (saline)\": get_empo_cat_color(\"Hypersaline (saline)\"),\n", + " \"Surface (non-saline)\": get_empo_cat_color(\"Surface (non-saline)\"),\n", + " \"Surface (saline)\": get_empo_cat_color(\"Surface (saline)\"),\n", + " \"Water (non-saline)\": get_empo_cat_color(\"Water (non-saline)\"),\n", + " \"Sediment (saline)\": get_empo_cat_color(\"Sediment (saline)\"),\n", + " \"Soil (non-saline)\": get_empo_cat_color(\"Soil (non-saline)\"),\n", + " \"Sediment (non-saline)\": get_empo_cat_color(\"Sediment (non-saline)\"),\n", "}" ] }, @@ -594,20 +628,22 @@ "outputs": [], "source": [ "# Ensure y variable is numeric to avoid aggregation errors\n", - "mapping_qc_filt_df['adiv_observed_otus'] = pd.to_numeric(mapping_qc_filt_df['adiv_observed_otus'], errors='coerce')\n", + "mapping_qc_filt_df[\"adiv_observed_otus\"] = pd.to_numeric(\n", + " mapping_qc_filt_df[\"adiv_observed_otus\"], errors=\"coerce\"\n", + ")\n", "\n", "# Get valid categories (only ones in colorsHA)\n", "valid_categories_HA = set(colorsHA.keys())\n", "\n", "# Filter dataset to include only valid categories\n", "filtered_data_HA = mapping_qc_filt_df[\n", - " (mapping_qc_filt_df['empo_0'] == 'EMP sample') &\n", - " (mapping_qc_filt_df['empo_3'].isin(valid_categories_HA)) \n", + " (mapping_qc_filt_df[\"empo_0\"] == \"EMP sample\")\n", + " & (mapping_qc_filt_df[\"empo_3\"].isin(valid_categories_HA))\n", "]\n", "\n", "# Compute sorted order (only for valid categories)\n", "sorted_order = (\n", - " filtered_data_HA.groupby(['empo_3'])['adiv_observed_otus']\n", + " filtered_data_HA.groupby([\"empo_3\"])[\"adiv_observed_otus\"]\n", " .mean()\n", " .dropna()\n", " .sort_values()\n", @@ -621,21 +657,36 @@ "fig = plt.figure(figsize=(16, 8))\n", "\n", "# Plot the boxplot and jitter plot\n", - "sns.boxplot(fliersize=0, x='empo_3', y='adiv_observed_otus', hue='empo_3', linewidth=1, data=filtered_data_HA, \n", - " order=sorted_order, palette=palette_dict)\n", - "sns.stripplot(jitter=True, x='empo_3', y='adiv_observed_otus', data=filtered_data_HA, order=sorted_order, \n", - " color='black', size=1)\n", + "sns.boxplot(\n", + " fliersize=0,\n", + " x=\"empo_3\",\n", + " y=\"adiv_observed_otus\",\n", + " hue=\"empo_3\",\n", + " linewidth=1,\n", + " data=filtered_data_HA,\n", + " order=sorted_order,\n", + " palette=palette_dict,\n", + ")\n", + "sns.stripplot(\n", + " jitter=True,\n", + " x=\"empo_3\",\n", + " y=\"adiv_observed_otus\",\n", + " data=filtered_data_HA,\n", + " order=sorted_order,\n", + " color=\"black\",\n", + " size=1,\n", + ")\n", "\n", "# Customize the plot\n", - "plt.xticks(rotation=45, ha='right', fontsize=16)\n", + "plt.xticks(rotation=45, ha=\"right\", fontsize=16)\n", "plt.yticks(fontsize=16)\n", - "plt.xlabel('')\n", + "plt.xlabel(\"\")\n", "plt.ylim(0, 3000)\n", - "plt.ylabel('Observed tag sequences', fontsize=16)\n", + "plt.ylabel(\"Observed tag sequences\", fontsize=16)\n", "\n", "# Add median line\n", - "median = filtered_data_HA['adiv_observed_otus'].median()\n", - "plt.axhline(y=median, xmin=0, xmax=1, color='y')\n", + "median = filtered_data_HA[\"adiv_observed_otus\"].median()\n", + "plt.axhline(y=median, xmin=0, xmax=1, color=\"y\")\n", "\n", "# Adjust layout and save the figure\n", "plt.tight_layout()\n", @@ -645,7 +696,9 @@ "os.makedirs(alpha_div_output_dir, exist_ok=True)\n", "\n", "# Save figure\n", - "alpha_div_box_plot_host_ass = os.path.join(alpha_div_output_dir, \"1_alpha_diversity_host_associated_samples.png\")\n", + "alpha_div_box_plot_host_ass = os.path.join(\n", + " alpha_div_output_dir, \"1_alpha_diversity_host_associated_samples.png\"\n", + ")\n", "plt.savefig(alpha_div_box_plot_host_ass, dpi=300, bbox_inches=\"tight\")" ] }, @@ -663,20 +716,22 @@ "outputs": [], "source": [ "# Ensure y variable is numeric to avoid aggregation errors\n", - "mapping_qc_filt_df['adiv_observed_otus'] = pd.to_numeric(mapping_qc_filt_df['adiv_observed_otus'], errors='coerce')\n", + "mapping_qc_filt_df[\"adiv_observed_otus\"] = pd.to_numeric(\n", + " mapping_qc_filt_df[\"adiv_observed_otus\"], errors=\"coerce\"\n", + ")\n", "\n", "# Get valid free-living categories (only ones in colorsFL)\n", "valid_categories_FL = list(colorsFL.keys())\n", "\n", "# Filter dataset to include only valid free-living categories\n", "filtered_data_FL = mapping_qc_filt_df[\n", - " (mapping_qc_filt_df['empo_0'] == 'EMP sample') &\n", - " (mapping_qc_filt_df['empo_3'].isin(valid_categories_FL)) \n", + " (mapping_qc_filt_df[\"empo_0\"] == \"EMP sample\")\n", + " & (mapping_qc_filt_df[\"empo_3\"].isin(valid_categories_FL))\n", "]\n", "\n", "# Compute sorted order (only for valid categories)\n", "sorted_order_FL = (\n", - " filtered_data_FL.groupby(['empo_3'])['adiv_observed_otus']\n", + " filtered_data_FL.groupby([\"empo_3\"])[\"adiv_observed_otus\"]\n", " .mean()\n", " .dropna()\n", " .sort_values()\n", @@ -686,51 +741,44 @@ "# Create the Plotly figure using boxplot and stripplot (jittered points)\n", "fig = px.box(\n", " filtered_data_FL,\n", - " x='empo_3',\n", - " y='adiv_observed_otus',\n", - " color='empo_3',\n", - " category_orders={'empo_3': sorted_order_FL},\n", + " x=\"empo_3\",\n", + " y=\"adiv_observed_otus\",\n", + " color=\"empo_3\",\n", + " category_orders={\"empo_3\": sorted_order_FL},\n", " color_discrete_map=colorsFL,\n", - " labels={'adiv_observed_otus': 'Observed tag sequences'},\n", - " points=False\n", + " labels={\"adiv_observed_otus\": \"Observed tag sequences\"},\n", + " points=False,\n", ")\n", "\n", "# Add jittered points (strip plot)\n", "fig.add_trace(\n", " px.strip(\n", - " filtered_data_FL,\n", - " x='empo_3',\n", - " y='adiv_observed_otus',\n", - " stripmode='overlay'\n", + " filtered_data_FL, x=\"empo_3\", y=\"adiv_observed_otus\", stripmode=\"overlay\"\n", " ).data[0]\n", ")\n", "\n", "# Modify the dot color and size directly inside the add_trace()\n", - "fig.data[-1].update(\n", - " marker=dict(\n", - " color='black', \n", - " size=1, \n", - " opacity=0.7 \n", - " )\n", - ")\n", + "fig.data[-1].update(marker=dict(color=\"black\", size=1, opacity=0.7))\n", "\n", "# Add median line\n", - "median = filtered_data_FL['adiv_observed_otus'].median()\n", - "fig.add_hline(y=median, line=dict(color='yellow'))\n", + "median = filtered_data_FL[\"adiv_observed_otus\"].median()\n", + "fig.add_hline(y=median, line=dict(color=\"yellow\"))\n", "\n", "# Customize the plot\n", "fig.update_layout(\n", - " xaxis_title='',\n", - " yaxis_title='Observed tag sequences',\n", + " xaxis_title=\"\",\n", + " yaxis_title=\"Observed tag sequences\",\n", " xaxis_tickangle=-45,\n", - " plot_bgcolor='rgba(0,0,0,0)',\n", + " plot_bgcolor=\"rgba(0,0,0,0)\",\n", " showlegend=False,\n", " height=600,\n", " font=dict(size=14),\n", ")\n", "\n", "# Save figure as JSON\n", - "alpha_div_box_plot_free_living_json = os.path.join(alpha_div_output_dir, \"2_alpha_diversity_free_living_samples.json\")\n", + "alpha_div_box_plot_free_living_json = os.path.join(\n", + " alpha_div_output_dir, \"2_alpha_diversity_free_living_samples.json\"\n", + ")\n", "fig.write_json(alpha_div_box_plot_free_living_json)\n", "\n", "# Save figure as PNG\n", @@ -764,14 +812,20 @@ "outputs": [], "source": [ "# Create the output directory for the metagenomics section and average copy number subsection\n", - "avg_copy_numb_dir = os.path.join(base_output_dir, \"2_Metagenomics/2_average_copy_number\")\n", + "avg_copy_numb_dir = os.path.join(\n", + " base_output_dir, \"2_Metagenomics/2_average_copy_number\"\n", + ")\n", "os.makedirs(avg_copy_numb_dir, exist_ok=True)\n", "\n", - "# Load data \n", + "# Load data\n", "emp_gg_otus_sampsum = \"https://raw.githubusercontent.com//biocore/emp/master/data/predicted-rrna-copy-number/emp_cr_gg_13_8.qc_filtered_filt_summary_samplesum.txt\"\n", - "emp_gg_otus_sampsum_df = pd.read_csv(emp_gg_otus_sampsum, sep='\\t', index_col=0, header=None).sort_index()\n", + "emp_gg_otus_sampsum_df = pd.read_csv(\n", + " emp_gg_otus_sampsum, sep=\"\\t\", index_col=0, header=None\n", + ").sort_index()\n", "emp_gg_otus_norm_sampsum = \"https://raw.githubusercontent.com//biocore/emp/master/data/predicted-rrna-copy-number/emp_cr_gg_13_8.normalized_qcfilt_summary_samplesum.txt\"\n", - "emp_gg_otus_norm_sampsum_df = pd.read_csv(emp_gg_otus_norm_sampsum, sep='\\t', index_col=0, header=None).sort_index()" + "emp_gg_otus_norm_sampsum_df = pd.read_csv(\n", + " emp_gg_otus_norm_sampsum, sep=\"\\t\", index_col=0, header=None\n", + ").sort_index()" ] }, { @@ -784,10 +838,14 @@ "mapping_qc_filt_merged_df = mapping_qc_filt_df.copy()\n", "\n", "# Merge new mapping df with emp_gg_otus_sampsum and emp_gg_otus_norm_sampsum\n", - "mapping_qc_filt_merged_df['sampsum'] = emp_gg_otus_sampsum_df[1]\n", - "mapping_qc_filt_merged_df['normsampsum'] = emp_gg_otus_norm_sampsum_df[1]\n", - "mapping_qc_filt_merged_df['copynumberdepletion'] = np.divide(emp_gg_otus_norm_sampsum_df[1], emp_gg_otus_sampsum_df[1])\n", - "mapping_qc_filt_merged_df['averagecopy'] = np.divide(1,np.divide(emp_gg_otus_norm_sampsum_df[1],emp_gg_otus_sampsum_df[1]))" + "mapping_qc_filt_merged_df[\"sampsum\"] = emp_gg_otus_sampsum_df[1]\n", + "mapping_qc_filt_merged_df[\"normsampsum\"] = emp_gg_otus_norm_sampsum_df[1]\n", + "mapping_qc_filt_merged_df[\"copynumberdepletion\"] = np.divide(\n", + " emp_gg_otus_norm_sampsum_df[1], emp_gg_otus_sampsum_df[1]\n", + ")\n", + "mapping_qc_filt_merged_df[\"averagecopy\"] = np.divide(\n", + " 1, np.divide(emp_gg_otus_norm_sampsum_df[1], emp_gg_otus_sampsum_df[1])\n", + ")" ] }, { @@ -805,25 +863,35 @@ "source": [ "plt.figure(figsize=(10, 6))\n", "\n", - "for i in ['Animal', 'Non-saline', 'Plant', 'Saline']:\n", - " plt.hist(mapping_qc_filt_merged_df[mapping_qc_filt_merged_df.empo_2 == i]['averagecopy'].dropna(), label=i,\n", - " bins=200, linewidth=0, color=get_empo_cat_color(i), alpha=0.8)\n", + "for i in [\"Animal\", \"Non-saline\", \"Plant\", \"Saline\"]:\n", + " plt.hist(\n", + " mapping_qc_filt_merged_df[mapping_qc_filt_merged_df.empo_2 == i][\n", + " \"averagecopy\"\n", + " ].dropna(),\n", + " label=i,\n", + " bins=200,\n", + " linewidth=0,\n", + " color=get_empo_cat_color(i),\n", + " alpha=0.8,\n", + " )\n", "\n", "# Customize axes: remove top and right borders\n", - "plt.gca().spines['top'].set_visible(False)\n", - "plt.gca().spines['right'].set_visible(False)\n", + "plt.gca().spines[\"top\"].set_visible(False)\n", + "plt.gca().spines[\"right\"].set_visible(False)\n", "\n", "# Titles and labels\n", - "plt.legend(loc=1, prop={'size':9}, frameon=False)\n", - "plt.xlabel('Predicted average community 16S copy number', fontsize=12)\n", - "plt.ylabel('Number of samples', fontsize=12)\n", + "plt.legend(loc=1, prop={\"size\": 9}, frameon=False)\n", + "plt.xlabel(\"Predicted average community 16S copy number\", fontsize=12)\n", + "plt.ylabel(\"Number of samples\", fontsize=12)\n", "plt.xticks(fontsize=10)\n", "plt.yticks(fontsize=10)\n", - "plt.xlim([0,8])\n", + "plt.xlim([0, 8])\n", "plt.tight_layout()\n", "\n", "# Save the figure\n", - "avg_copy_numb_empo2 = os.path.join(avg_copy_numb_dir, \"1_average_copy_number_emp_ontology_level2.png\")\n", + "avg_copy_numb_empo2 = os.path.join(\n", + " avg_copy_numb_dir, \"1_average_copy_number_emp_ontology_level2.png\"\n", + ")\n", "plt.savefig(avg_copy_numb_empo2, dpi=300, bbox_inches=\"tight\")" ] }, @@ -847,11 +915,13 @@ "for i in mapping_qc_filt_merged_df.empo_3.dropna().unique():\n", " hist_traces.append(\n", " go.Histogram(\n", - " x=mapping_qc_filt_merged_df[mapping_qc_filt_merged_df.empo_3 == i]['averagecopy'].dropna(),\n", + " x=mapping_qc_filt_merged_df[mapping_qc_filt_merged_df.empo_3 == i][\n", + " \"averagecopy\"\n", + " ].dropna(),\n", " name=i, # Legend name\n", " marker=dict(color=get_empo_cat_color(i)), # Assign color\n", " opacity=0.5,\n", - " nbinsx=200 # Number of bins\n", + " nbinsx=200, # Number of bins\n", " )\n", " )\n", "\n", @@ -863,31 +933,33 @@ " xaxis_title=\"Predicted average community 16S copy number\",\n", " yaxis_title=\"Number of samples\",\n", " xaxis=dict(\n", - " range=[0, 8], \n", + " range=[0, 8],\n", " tickfont=dict(size=10),\n", - " showline=True, \n", - " linewidth=1, \n", + " showline=True,\n", + " linewidth=1,\n", " linecolor=\"black\",\n", - " mirror=False, \n", - " showgrid=False, \n", - " zeroline=False \n", + " mirror=False,\n", + " showgrid=False,\n", + " zeroline=False,\n", " ),\n", " yaxis=dict(\n", " tickfont=dict(size=10),\n", - " showline=True, \n", - " linewidth=1, \n", + " showline=True,\n", + " linewidth=1,\n", " linecolor=\"black\",\n", - " mirror=False, \n", - " showgrid=False, \n", - " zeroline=False \n", + " mirror=False,\n", + " showgrid=False,\n", + " zeroline=False,\n", " ),\n", - " barmode=\"overlay\", \n", + " barmode=\"overlay\",\n", " showlegend=True,\n", - " legend=dict(font=dict(size=11), borderwidth=0), \n", - " plot_bgcolor=\"white\" \n", + " legend=dict(font=dict(size=11), borderwidth=0),\n", + " plot_bgcolor=\"white\",\n", ")\n", "# Save the figure as JSON\n", - "avg_copy_numb_empo3_json = os.path.join(avg_copy_numb_dir, \"2_average_copy_number_emp_ontology_level3.json\")\n", + "avg_copy_numb_empo3_json = os.path.join(\n", + " avg_copy_numb_dir, \"2_average_copy_number_emp_ontology_level3.json\"\n", + ")\n", "fig.write_json(avg_copy_numb_empo3_json)\n", "\n", "# Save the figure as PNG\n", @@ -930,8 +1002,7 @@ "nest_phylum_plantsamples = \"https://raw.githubusercontent.com//biocore/emp/master/data/nestedness/nest_phylum_Plant.csv\"\n", "nest_phylum_plantsamples_df = pd.read_csv(nest_phylum_plantsamples)\n", "nest_phylum_nonsalinesamples = \"https://raw.githubusercontent.com//biocore/emp/master/data/nestedness/nest_phylum_Non-saline.csv\"\n", - "nest_phylum_nonsalinesamples_df = pd.read_csv(nest_phylum_nonsalinesamples)\n", - " " + "nest_phylum_nonsalinesamples_df = pd.read_csv(nest_phylum_nonsalinesamples)" ] }, { @@ -948,10 +1019,14 @@ "outputs": [], "source": [ "# Obtain a randome sample of the nestedness df for all samples\n", - "sample_nest_phylum_allsamples_df = nest_phylum_allsamples_df.sample(100, random_state=42)\n", + "sample_nest_phylum_allsamples_df = nest_phylum_allsamples_df.sample(\n", + " 100, random_state=42\n", + ")\n", "\n", "# Export the sample df as a CSV file\n", - "sample_nest_phylum_allsamples_df.to_csv(f'{nestedness_dir}/1_nestedness_random_subset.csv')" + "sample_nest_phylum_allsamples_df.to_csv(\n", + " f\"{nestedness_dir}/1_nestedness_random_subset.csv\"\n", + ")" ] }, { @@ -972,7 +1047,9 @@ "ymax = nest_phylum_allsamples_df.OBSERVATION_RANK.max()\n", "\n", "# Get colors for each empo_3 category\n", - "nest_phylum_allsamples_df['color'] = nest_phylum_allsamples_df['empo_3'].apply(get_empo_cat_color)\n", + "nest_phylum_allsamples_df[\"color\"] = nest_phylum_allsamples_df[\"empo_3\"].apply(\n", + " get_empo_cat_color\n", + ")\n", "\n", "# Create the scatter plot\n", "fig = px.scatter(\n", @@ -980,9 +1057,15 @@ " x=\"SAMPLE_RANK\",\n", " y=\"OBSERVATION_RANK\",\n", " color=\"empo_3\",\n", - " color_discrete_map={empo: get_empo_cat_color(empo) for empo in nest_phylum_allsamples_df['empo_3'].unique()},\n", - " labels={\"SAMPLE_RANK\": \"All samples (sorted by richness)\", \"OBSERVATION_RANK\": \"Phyla (sorted by prevalence)\"},\n", - " template=\"plotly_white\"\n", + " color_discrete_map={\n", + " empo: get_empo_cat_color(empo)\n", + " for empo in nest_phylum_allsamples_df[\"empo_3\"].unique()\n", + " },\n", + " labels={\n", + " \"SAMPLE_RANK\": \"All samples (sorted by richness)\",\n", + " \"OBSERVATION_RANK\": \"Phyla (sorted by prevalence)\",\n", + " },\n", + " template=\"plotly_white\",\n", ")\n", "\n", "# Customize layout\n", @@ -990,9 +1073,9 @@ "fig.update_layout(\n", " width=1200, # Increase width for a wider plot\n", " height=600, # Adjust height if needed\n", - " xaxis=dict(range=[0, xmax+1], title_font=dict(size=20), tickfont=dict(size=18)),\n", - " yaxis=dict(range=[0, ymax+0.8], title_font=dict(size=20), tickfont=dict(size=18)),\n", - " showlegend=False\n", + " xaxis=dict(range=[0, xmax + 1], title_font=dict(size=20), tickfont=dict(size=18)),\n", + " yaxis=dict(range=[0, ymax + 0.8], title_font=dict(size=20), tickfont=dict(size=18)),\n", + " showlegend=False,\n", ")\n", "\n", "# Save the figure as an interactive HTML file\n", @@ -1015,7 +1098,9 @@ "ymax = nest_phylum_plantsamples_df.OBSERVATION_RANK.max()\n", "\n", "# Get colors for each empo_3 category\n", - "nest_phylum_plantsamples_df['color'] = nest_phylum_plantsamples_df['empo_3'].apply(get_empo_cat_color)\n", + "nest_phylum_plantsamples_df[\"color\"] = nest_phylum_plantsamples_df[\"empo_3\"].apply(\n", + " get_empo_cat_color\n", + ")\n", "\n", "# Create the scatter plot\n", "fig = px.scatter(\n", @@ -1023,9 +1108,15 @@ " x=\"SAMPLE_RANK\",\n", " y=\"OBSERVATION_RANK\",\n", " color=\"empo_3\",\n", - " color_discrete_map={empo: get_empo_cat_color(empo) for empo in nest_phylum_plantsamples_df['empo_3'].unique()},\n", - " labels={\"SAMPLE_RANK\": \"Plant samples (sorted by richness)\", \"OBSERVATION_RANK\": \"Phyla (sorted by prevalence)\"},\n", - " template=\"plotly_white\"\n", + " color_discrete_map={\n", + " empo: get_empo_cat_color(empo)\n", + " for empo in nest_phylum_plantsamples_df[\"empo_3\"].unique()\n", + " },\n", + " labels={\n", + " \"SAMPLE_RANK\": \"Plant samples (sorted by richness)\",\n", + " \"OBSERVATION_RANK\": \"Phyla (sorted by prevalence)\",\n", + " },\n", + " template=\"plotly_white\",\n", ")\n", "\n", "# Customize layout\n", @@ -1033,15 +1124,17 @@ "fig.update_layout(\n", " width=1200, # Increase width for a wider plot\n", " height=600, # Adjust height if needed\n", - " xaxis=dict(range=[0, xmax+1], title_font=dict(size=20), tickfont=dict(size=18)),\n", - " yaxis=dict(range=[0, ymax+0.8], title_font=dict(size=20), tickfont=dict(size=18)),\n", + " xaxis=dict(range=[0, xmax + 1], title_font=dict(size=20), tickfont=dict(size=18)),\n", + " yaxis=dict(range=[0, ymax + 0.8], title_font=dict(size=20), tickfont=dict(size=18)),\n", " legend=dict(\n", " orientation=\"h\", # Horizontal legend\n", - " yanchor=\"top\", y=-0.2, # Moves the legend below the x-axis\n", - " xanchor=\"center\", x=0.5, # Centers the legend\n", - " font=dict(size=16)\n", + " yanchor=\"top\",\n", + " y=-0.2, # Moves the legend below the x-axis\n", + " xanchor=\"center\",\n", + " x=0.5, # Centers the legend\n", + " font=dict(size=16),\n", " ),\n", - " legend_title_text=\"\"\n", + " legend_title_text=\"\",\n", ")\n", "\n", "# Save the figure as an interactive HTML file\n", @@ -1071,7 +1164,7 @@ "ymax = nest_phylum_animalsamples_df.OBSERVATION_RANK.max()\n", "\n", "# Create the figure and axis\n", - "fig, ax = plt.subplots(figsize=(500/30, 80/12.7)) # Adjust size as needed\n", + "fig, ax = plt.subplots(figsize=(500 / 30, 80 / 12.7)) # Adjust size as needed\n", "\n", "# Store legend handles and labels\n", "legend_handles = []\n", @@ -1081,9 +1174,16 @@ "for empo3 in np.sort(nest_phylum_animalsamples_df.empo_3.unique()):\n", " color = get_empo_cat_color(empo3)\n", " scatter = ax.scatter(\n", - " nest_phylum_animalsamples_df[nest_phylum_animalsamples_df.empo_3 == empo3].SAMPLE_RANK, \n", - " nest_phylum_animalsamples_df[nest_phylum_animalsamples_df.empo_3 == empo3].OBSERVATION_RANK, \n", - " marker='|', linewidths=2, label=empo3, color=color\n", + " nest_phylum_animalsamples_df[\n", + " nest_phylum_animalsamples_df.empo_3 == empo3\n", + " ].SAMPLE_RANK,\n", + " nest_phylum_animalsamples_df[\n", + " nest_phylum_animalsamples_df.empo_3 == empo3\n", + " ].OBSERVATION_RANK,\n", + " marker=\"|\",\n", + " linewidths=2,\n", + " label=empo3,\n", + " color=color,\n", " )\n", " legend_handles.append(scatter)\n", " legend_labels.append(empo3)\n", @@ -1091,20 +1191,26 @@ "# Customize labels and appearance\n", "ax.set_xlabel(\"Animal samples (sorted by richness)\", fontsize=20)\n", "ax.set_ylabel(\"Phyla (sorted by prevalence)\", fontsize=20)\n", - "ax.tick_params(axis='both', which='major', labelsize=18)\n", + "ax.tick_params(axis=\"both\", which=\"major\", labelsize=18)\n", "\n", "# Add legend\n", "ax.legend(\n", - " handles=legend_handles, labels=legend_labels,\n", - " loc='upper center', bbox_to_anchor=(0.5, -0.2), # Moves legend below x-axis\n", - " ncol=3, fontsize=16, frameon=False, scatterpoints=1, handletextpad=0.5\n", + " handles=legend_handles,\n", + " labels=legend_labels,\n", + " loc=\"upper center\",\n", + " bbox_to_anchor=(0.5, -0.2), # Moves legend below x-axis\n", + " ncol=3,\n", + " fontsize=16,\n", + " frameon=False,\n", + " scatterpoints=1,\n", + " handletextpad=0.5,\n", ")\n", "\n", "# Increase space at the bottom so the legend is not cut off\n", "plt.subplots_adjust(bottom=0.25)\n", "\n", - "ax.set_xlim([0, xmax+1])\n", - "ax.set_ylim([0, ymax+0.8])\n", + "ax.set_xlim([0, xmax + 1])\n", + "ax.set_ylim([0, ymax + 0.8])\n", "\n", "plt.tight_layout()\n", "fig.patch.set_alpha(0.0)\n", @@ -1126,7 +1232,7 @@ "ymax = nest_phylum_nonsalinesamples_df.OBSERVATION_RANK.max()\n", "\n", "# Create the figure and axis\n", - "fig, ax = plt.subplots(figsize=(500/30, 80/12.7)) # Adjust size as needed\n", + "fig, ax = plt.subplots(figsize=(500 / 30, 80 / 12.7)) # Adjust size as needed\n", "\n", "# Store legend handles and labels\n", "legend_handles = []\n", @@ -1136,9 +1242,16 @@ "for empo3 in np.sort(nest_phylum_nonsalinesamples_df.empo_3.unique()):\n", " color = get_empo_cat_color(empo3)\n", " scatter = ax.scatter(\n", - " nest_phylum_nonsalinesamples_df[nest_phylum_nonsalinesamples_df.empo_3 == empo3].SAMPLE_RANK, \n", - " nest_phylum_nonsalinesamples_df[nest_phylum_nonsalinesamples_df.empo_3 == empo3].OBSERVATION_RANK, \n", - " marker='|', linewidths=2, label=empo3, color=color\n", + " nest_phylum_nonsalinesamples_df[\n", + " nest_phylum_nonsalinesamples_df.empo_3 == empo3\n", + " ].SAMPLE_RANK,\n", + " nest_phylum_nonsalinesamples_df[\n", + " nest_phylum_nonsalinesamples_df.empo_3 == empo3\n", + " ].OBSERVATION_RANK,\n", + " marker=\"|\",\n", + " linewidths=2,\n", + " label=empo3,\n", + " color=color,\n", " )\n", " legend_handles.append(scatter)\n", " legend_labels.append(empo3)\n", @@ -1146,27 +1259,35 @@ "# Customize labels and appearance\n", "ax.set_xlabel(\"Non saline samples (sorted by richness)\", fontsize=20)\n", "ax.set_ylabel(\"Phyla (sorted by prevalence)\", fontsize=20)\n", - "ax.tick_params(axis='both', which='major', labelsize=18)\n", + "ax.tick_params(axis=\"both\", which=\"major\", labelsize=18)\n", "\n", "# Add legend\n", "ax.legend(\n", - " handles=legend_handles, labels=legend_labels,\n", - " loc='upper center', bbox_to_anchor=(0.5, -0.2), # Moves legend below x-axis\n", - " ncol=3, fontsize=16, frameon=False, scatterpoints=1, handletextpad=0.5\n", + " handles=legend_handles,\n", + " labels=legend_labels,\n", + " loc=\"upper center\",\n", + " bbox_to_anchor=(0.5, -0.2), # Moves legend below x-axis\n", + " ncol=3,\n", + " fontsize=16,\n", + " frameon=False,\n", + " scatterpoints=1,\n", + " handletextpad=0.5,\n", ")\n", "\n", "# Increase space at the bottom so the legend is not cut off\n", "plt.subplots_adjust(bottom=0.25)\n", "\n", - "ax.set_xlim([0, xmax+1])\n", - "ax.set_ylim([0, ymax+0.8])\n", + "ax.set_xlim([0, xmax + 1])\n", + "ax.set_ylim([0, ymax + 0.8])\n", "\n", "plt.tight_layout()\n", "fig.patch.set_alpha(0.0)\n", "\n", "# Save the figure\n", "os.makedirs(nestedness_dir, exist_ok=True)\n", - "nest_phylum_nonsalinesamples_path = os.path.join(nestedness_dir, \"5_non_saline_samples.png\")\n", + "nest_phylum_nonsalinesamples_path = os.path.join(\n", + " nestedness_dir, \"5_non_saline_samples.png\"\n", + ")\n", "plt.savefig(nest_phylum_nonsalinesamples_path, dpi=300, bbox_inches=\"tight\")" ] }, @@ -1198,7 +1319,9 @@ "outputs": [], "source": [ "# Create the output directory for the network analysis section and microbial networks subsection\n", - "network_dir = os.path.join(base_output_dir, \"3_Network_analysis/1_phyla_association_networks\")\n", + "network_dir = os.path.join(\n", + " base_output_dir, \"3_Network_analysis/1_phyla_association_networks\"\n", + ")\n", "os.makedirs(network_dir, exist_ok=True)\n", "\n", "# Load OTU counts table\n", @@ -1206,11 +1329,16 @@ "\n", "# Download the file and save it as a binary file\n", "response = requests.get(otu_counts)\n", - "with open(\"example_data/Earth_microbiome_vuegen_demo_notebook/emp_deblur_100bp.subset_2k.rare_5000.biom\", 'wb') as f:\n", + "with open(\n", + " \"example_data/Earth_microbiome_vuegen_demo_notebook/emp_deblur_100bp.subset_2k.rare_5000.biom\",\n", + " \"wb\",\n", + ") as f:\n", " f.write(response.content)\n", "\n", "# Load the BIOM file and convert it to a DataFrame\n", - "otu_counts_table = biom.load_table(\"example_data/Earth_microbiome_vuegen_demo_notebook/emp_deblur_100bp.subset_2k.rare_5000.biom\")" + "otu_counts_table = biom.load_table(\n", + " \"example_data/Earth_microbiome_vuegen_demo_notebook/emp_deblur_100bp.subset_2k.rare_5000.biom\"\n", + ")" ] }, { @@ -1221,8 +1349,8 @@ "source": [ "# Collapse the table to the phylum level\n", "phylum_idx = 1\n", - "collapse_f = lambda id_, md: '; '.join(md['taxonomy'][:phylum_idx + 1])\n", - "phyla_table = otu_counts_table.collapse(collapse_f, axis='observation')\n", + "collapse_f = lambda id_, md: \"; \".join(md[\"taxonomy\"][: phylum_idx + 1])\n", + "phyla_table = otu_counts_table.collapse(collapse_f, axis=\"observation\")\n", "\n", "# Convert the collapsed table to a DataFrame\n", "phyla_counts_df = phyla_table.to_dataframe()" @@ -1242,17 +1370,23 @@ "outputs": [], "source": [ "# Clean the index (which contains Phylum names) by removing unnecessary parts\n", - "phyla_counts_df.index = phyla_counts_df.index.str.split(';').str[-1].str.replace('p__', '', regex=False)\n", + "phyla_counts_df.index = (\n", + " phyla_counts_df.index.str.split(\";\").str[-1].str.replace(\"p__\", \"\", regex=False)\n", + ")\n", "\n", "# Remove special characters like [] and unnecessary spaces\n", - "phyla_counts_df.index = phyla_counts_df.index.str.replace('[', '', regex=False).str.replace(']', '', regex=False).str.strip()\n", + "phyla_counts_df.index = (\n", + " phyla_counts_df.index.str.replace(\"[\", \"\", regex=False)\n", + " .str.replace(\"]\", \"\", regex=False)\n", + " .str.strip()\n", + ")\n", "\n", "# Remove rows where the index only has 'k__' and 'Unclassified'\n", - "phyla_counts_df = phyla_counts_df[~(phyla_counts_df.index == 'Unclassified')]\n", - "phyla_counts_df = phyla_counts_df[~phyla_counts_df.index.str.contains('k__')]\n", + "phyla_counts_df = phyla_counts_df[~(phyla_counts_df.index == \"Unclassified\")]\n", + "phyla_counts_df = phyla_counts_df[~phyla_counts_df.index.str.contains(\"k__\")]\n", "\n", "# Remove duplicaye rows\n", - "phyla_counts_df = phyla_counts_df[~phyla_counts_df.index.duplicated(keep='first')]\n" + "phyla_counts_df = phyla_counts_df[~phyla_counts_df.index.duplicated(keep=\"first\")]" ] }, { @@ -1265,7 +1399,7 @@ "sample_phyla_counts_df = phyla_counts_df.sample(50, axis=1)\n", "\n", "# Export the sample df as a CSV file\n", - "sample_phyla_counts_df.to_csv(f'{network_dir}/1_phyla_counts_subset.csv')" + "sample_phyla_counts_df.to_csv(f\"{network_dir}/1_phyla_counts_subset.csv\")" ] }, { @@ -1314,10 +1448,15 @@ "# Remove singleton nodes (nodes with no edges)\n", "G.remove_nodes_from(list(nx.isolates(G)))\n", "\n", - "# Export network as an edge list in CSV format, the \"edge_list\" word should be in the file name to be \n", + "# Export network as an edge list in CSV format, the \"edge_list\" word should be in the file name to be\n", "# recognized as an edge list file\n", "edge_list = nx.to_pandas_edgelist(G)\n", - "edge_list.to_csv(os.path.join(network_dir, \"2_phyla_correlation_network_with_0.5_threshold_edgelist.csv\"), index=False)" + "edge_list.to_csv(\n", + " os.path.join(\n", + " network_dir, \"2_phyla_correlation_network_with_0.5_threshold_edgelist.csv\"\n", + " ),\n", + " index=False,\n", + ")" ] }, { @@ -1333,19 +1472,24 @@ "metadata": {}, "outputs": [], "source": [ - "# Draw the network \n", + "# Draw the network\n", "plt.figure(figsize=(8, 6))\n", "pos = nx.kamada_kawai_layout(G) # Layout for better visualization\n", - "nx.draw(G, pos, \n", - " with_labels=True, \n", - " node_size=500, \n", - " node_color=\"lightblue\", \n", - " edgecolors=\"black\",\n", - " linewidths=0.3, \n", - " font_size=10)\n", + "nx.draw(\n", + " G,\n", + " pos,\n", + " with_labels=True,\n", + " node_size=500,\n", + " node_color=\"lightblue\",\n", + " edgecolors=\"black\",\n", + " linewidths=0.3,\n", + " font_size=10,\n", + ")\n", "\n", "# Export the figure as a PNG file\n", - "network_path = os.path.join(network_dir, \"3_phyla_correlation_network_with_0.5_threshold.png\")\n", + "network_path = os.path.join(\n", + " network_dir, \"3_phyla_correlation_network_with_0.5_threshold.png\"\n", + ")\n", "plt.savefig(network_path, dpi=300, bbox_inches=\"tight\")" ] }, @@ -1382,7 +1526,9 @@ "source": [ "# Generate the report\n", "report_type = \"streamlit\"\n", - "report_generator.get_report(dir_path = base_output_dir, report_type = report_type, logger = None)" + "report_generator.get_report(\n", + " dir_path=base_output_dir, report_type=report_type, logger=None\n", + ")" ] }, { @@ -1395,14 +1541,20 @@ "# run_streamlit = True # uncomment line to run the streamlit report\n", "# Launch the Streamlit report depneding on the platform\n", "if not IN_COLAB and run_streamlit:\n", - " !streamlit run streamlit_report/sections/report_manager.py\n", + " !streamlit run streamlit_report/sections/report_manager.py\n", "elif run_streamlit:\n", - " # see: https://discuss.streamlit.io/t/how-to-launch-streamlit-app-from-google-colab-notebook/42399\n", - " print(\"Password/Enpoint IP for localtunnel is:\",urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip(\"\\n\"))\n", - " # Run the Streamlit app in the background\n", - " !streamlit run streamlit_report/sections/report_manager.py --server.address=localhost &>/content/logs.txt &\n", - " # Expose the Streamlit app on port 8501\n", - " !npx localtunnel --port 8501 --subdomain vuegen-demo\n", + " # see: https://discuss.streamlit.io/t/how-to-launch-streamlit-app-from-google-colab-notebook/42399\n", + " print(\n", + " \"Password/Enpoint IP for localtunnel is:\",\n", + " urllib.request.urlopen(\"https://ipv4.icanhazip.com\")\n", + " .read()\n", + " .decode(\"utf8\")\n", + " .strip(\"\\n\"),\n", + " )\n", + " # Run the Streamlit app in the background\n", + " !streamlit run streamlit_report/sections/report_manager.py --server.address=localhost &>/content/logs.txt &\n", + " # Expose the Streamlit app on port 8501\n", + " !npx localtunnel --port 8501 --subdomain vuegen-demo\n", "else:\n", " print(\"Streamlit report not executed, set run_streamlit to True to run the report\")" ] @@ -1422,7 +1574,9 @@ "source": [ "# Generate the report\n", "report_type = \"html\"\n", - "report_generator.get_report(dir_path = base_output_dir, report_type = report_type, logger = None)" + "report_generator.get_report(\n", + " dir_path=base_output_dir, report_type=report_type, logger=None\n", + ")" ] }, { @@ -1446,10 +1600,14 @@ "metadata": {}, "outputs": [], "source": [ - "empo_logo_path = \"https://raw.githubusercontent.com/ElDeveloper/cogs220/master/emp-logo.svg\"\n", + "empo_logo_path = (\n", + " \"https://raw.githubusercontent.com/ElDeveloper/cogs220/master/emp-logo.svg\"\n", + ")\n", "\n", "# Load the YAML file\n", - "config_path = os.path.join(base_output_dir, \"Earth_microbiome_vuegen_demo_notebook_config.yaml\")\n", + "config_path = os.path.join(\n", + " base_output_dir, \"Earth_microbiome_vuegen_demo_notebook_config.yaml\"\n", + ")\n", "config = load_yaml_config(config_path)\n", "\n", "# Update the logo and graphical abstract with the URL\n", @@ -1471,15 +1629,19 @@ "source": [ "# Update the description for the EDA section\n", "for section in config[\"sections\"]:\n", - " if section[\"title\"] == \"Exploratory Data Analysis\": \n", - " section[\"description\"] = \"This section contains the exploratory data analysis of the Earth Microbiome Project (EMP) dataset.\"\n", + " if section[\"title\"] == \"Exploratory Data Analysis\":\n", + " section[\"description\"] = (\n", + " \"This section contains the exploratory data analysis of the Earth Microbiome Project (EMP) dataset.\"\n", + " )\n", "\n", "# Update the description for the alpha diversity subsection from the Metagenomics section\n", "for section in config[\"sections\"]:\n", " if section[\"title\"] == \"Metagenomics\":\n", " for subsection in section[\"subsections\"]:\n", " if subsection[\"title\"] == \"Alpha Diversity\":\n", - " subsection[\"description\"] = \"This subsection contains the alpha diversity analysis of the EMP dataset.\"\n" + " subsection[\"description\"] = (\n", + " \"This subsection contains the alpha diversity analysis of the EMP dataset.\"\n", + " )" ] }, { @@ -1498,11 +1660,11 @@ "# Define new plot with a URL as the file path\n", "chem_prop_plot = {\n", " \"title\": \"Physicochemical properties of the EMP samples\",\n", - " \"file_path\": \"https://raw.githubusercontent.com/biocore/emp/master/methods/images/figureED1_physicochemical.png\", \n", + " \"file_path\": \"https://raw.githubusercontent.com/biocore/emp/master/methods/images/figureED1_physicochemical.png\",\n", " \"description\": \"\",\n", " \"caption\": \"Pairwise scatter plots of available physicochemical metadat are shown for temperature, salinity, oxygen, and pH, and for phosphate, nitrate, and ammonium\",\n", " \"component_type\": \"plot\",\n", - " \"plot_type\": \"static\"\n", + " \"plot_type\": \"static\",\n", "}\n", "\n", "# Add the plot to the Sample Provenance subsection in the EDA section\n", @@ -1529,23 +1691,23 @@ "# Define new plot with a URL as the file path\n", "specif_seq_plot = {\n", " \"title\": \"Specificity of sequences and higher taxonomic groups for environment\",\n", - " \"file_path\": \"https://raw.githubusercontent.com/biocore/emp/master/methods/images/figure4_entropy.png\", \n", + " \"file_path\": \"https://raw.githubusercontent.com/biocore/emp/master/methods/images/figure4_entropy.png\",\n", " \"description\": \"\",\n", " \"caption\": \"a) Environment distribution in all genera and 400 randomly chosen tag sequence. b) and c) Shannon entropy within each taxonomic group.\",\n", " \"component_type\": \"plot\",\n", - " \"plot_type\": \"static\"\n", + " \"plot_type\": \"static\",\n", "}\n", "\n", "# Define the new subsection for the Shannon entropy analysis\n", "entropy_subsection = {\n", - " \"title\": \"Shanon entropy analysis\", \n", + " \"title\": \"Shanon entropy analysis\",\n", " \"description\": \"This subsection contains the Shannon entropy analysis of the EMP dataset.\",\n", - " \"components\": [specif_seq_plot] \n", + " \"components\": [specif_seq_plot],\n", "}\n", "\n", "# Add the new subsection to the Metagenomics section\n", "for section in config[\"sections\"]:\n", - " if section[\"title\"] == \"Metagenomics\": \n", + " if section[\"title\"] == \"Metagenomics\":\n", " section[\"subsections\"].append(entropy_subsection)\n", "\n", "# Save the modified YAML file\n", @@ -1570,7 +1732,9 @@ "source": [ "# Test the changes by generarating the report from the modified YAML file\n", "report_type = \"streamlit\"\n", - "report_generator.get_report(config_path = config_path, report_type = report_type, logger = None)" + "report_generator.get_report(\n", + " config_path=config_path, report_type=report_type, logger=None\n", + ")" ] }, { @@ -1583,14 +1747,20 @@ "# run_streamlit = True # uncomment line to run the streamlit report\n", "# Launch the Streamlit report depneding on the platform\n", "if not IN_COLAB and run_streamlit:\n", - " !streamlit run streamlit_report/sections/report_manager.py\n", + " !streamlit run streamlit_report/sections/report_manager.py\n", "elif run_streamlit:\n", - " # see: https://discuss.streamlit.io/t/how-to-launch-streamlit-app-from-google-colab-notebook/42399\n", - " print(\"Password/Enpoint IP for localtunnel is:\",urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip(\"\\n\"))\n", - " # Run the Streamlit app in the background\n", - " !streamlit run streamlit_report/sections/report_manager.py --server.address=localhost &>/content/logs.txt &\n", - " # Expose the Streamlit app on port 8501\n", - " !npx localtunnel --port 8501 --subdomain vuegen-demo\n", + " # see: https://discuss.streamlit.io/t/how-to-launch-streamlit-app-from-google-colab-notebook/42399\n", + " print(\n", + " \"Password/Enpoint IP for localtunnel is:\",\n", + " urllib.request.urlopen(\"https://ipv4.icanhazip.com\")\n", + " .read()\n", + " .decode(\"utf8\")\n", + " .strip(\"\\n\"),\n", + " )\n", + " # Run the Streamlit app in the background\n", + " !streamlit run streamlit_report/sections/report_manager.py --server.address=localhost &>/content/logs.txt &\n", + " # Expose the Streamlit app on port 8501\n", + " !npx localtunnel --port 8501 --subdomain vuegen-demo\n", "else:\n", " print(\"Streamlit report not executed, set run_streamlit to True to run the report\")" ] @@ -1610,7 +1780,9 @@ "source": [ "# Test the changes by generarating the report from the modified YAML file\n", "report_type = \"html\"\n", - "report_generator.get_report(config_path = config_path, report_type = report_type, logger = None)" + "report_generator.get_report(\n", + " config_path=config_path, report_type=report_type, logger=None\n", + ")" ] } ],