# Introduction

To easily find the parameters on the jupyter notebook on the header bar select view -> cell toolbar -> tags. all the paramters will have the tag "parameter".

# Download GNPS Data

In [None]:
# importing necessary modules
import networkx as nx
from gnpsdata import taskresult
import os
import csv
import pandas as pd
import pandas as pd
import numpy as np
import os
import itertools
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.preprocessing import StandardScaler
from scipy.spatial import distance
from sklearn.decomposition import PCA
import scipy.stats as stats
import pingouin as pg
import skbio # Don't import on Windows!!
from ipyfilechooser import FileChooser
from ipywidgets import interact
from pynmranalysis.normalization import PQN_normalization
import warnings

from qiime2 import Visualization

from gnpsdata import workflow_fbmn

In [None]:
# GNPS1 task id
task = "cf6e14abf5604f47b28b467a513d3532"

In [None]:
# Download quantification
os.makedirs("./QIIME2/output_QIIME2_Notebook", exist_ok=True)
workflow_fbmn.download_quantification(task, "./QIIME2/output_QIIME2_Notebook/quant.csv", gnps2=False)

# Downloading metadata
workflow_fbmn.download_metadata(task, "./QIIME2/output_QIIME2_Notebook/unprocessed_metadata.tsv", gnps2=False)

# Downloading the qiime artifact
workflow_fbmn.download_qiime2(task, "./QIIME2/output_QIIME2_Notebook/qiime_table.qza")

In [None]:
# GNPS2 task id
task = "eb59f7b4ffe04267acec3dad7c51a655"

In [None]:
# Download quantification
os.makedirs("./QIIME2/output_QIIME2_Notebook", exist_ok=True)
workflow_fbmn.download_quantification(task, "./QIIME2/output_QIIME2_Notebook/quant.csv", gnps2=True)

# Downloading metadata
workflow_fbmn.download_metadata(task, "./QIIME2/output_QIIME2_Notebook/unprocessed_metadata.tsv", gnps2=True)

# Creating the Qiime2 artifact

# Replace the following file names with your own
input_file = './QIIME2/output_QIIME2_Notebook/quant.csv'
output_file = './QIIME2/output_QIIME2_Notebook/biom_quant.tsv'

# Open the input CSV file
with open(input_file, 'r') as csv_file:
    reader = csv.reader(csv_file)
    header = next(reader)  # Get the header row
    
    # Get the indexes of the first three columns
    indexes = [0, 1, 2]
    
    # Get the indexes of columns with "Peak area" in the header
    peak_area_indexes = [i for i in range(len(header)) if 'Peak area' in header[i]]
    
    # Open the output TSV file
    with open(output_file, 'w', newline='') as tsv_file:
        writer = csv.writer(tsv_file, delimiter='\t')
        
        # Write the header row with updated column names
        new_header = ['#OTU ID'] + [header[i].replace('Peak area', '') for i in peak_area_indexes]
        writer.writerow(new_header)
        
        # Loop through the remaining rows of the input CSV file
        for row in reader:
            # Extract the values from the first three columns
            otu_id = row[0]
            
            # Extract the values from columns with "Peak area" in the header
            peak_area_values = [row[i].replace('Peak area', '') for i in peak_area_indexes]
            
            # Write a row to the output TSV file
            new_row = [otu_id] + peak_area_values
            writer.writerow(new_row)

! biom convert \
  -i ./QIIME2/output_QIIME2_Notebook/biom_quant.tsv \
  -o ./QIIME2/output_QIIME2_Notebook/quant.biom --to-hdf5

! qiime tools import \
  --input-path ./QIIME2/output_QIIME2_Notebook/quant.biom \
  --type 'FeatureTable[Frequency]' \
  --input-format BIOMV210Format \
  --output-path ./QIIME2/output_QIIME2_Notebook/qiime_table.qza

# Changing Metadata and Manifest Column name

In [None]:
#read metadata file
metadata = pd.read_csv("./QIIME2/output_QIIME2_Notebook/unprocessed_metadata.tsv", sep = "\t", index_col=False)
#rename 1st column to "#sample id
metadata = metadata.rename(columns={"filename":"sample id"})
#convert back to .tsv
metadata.to_csv('./QIIME2/output_QIIME2_Notebook/metadata.tsv', sep="\t", index=False)

In [None]:
# Disable warnings for cleaner output, comment out for debugging
warnings.filterwarnings('ignore')

# Blank Removal

In [None]:
# Doing this via Qiime Plugin

! qiime blankremoval-plugin blankremoval-function \
--i-input-artifact ./QIIME2/output_QIIME2_Notebook/qiime_table.qza \
--p-metadatafile ./QIIME2/output_QIIME2_Notebook/unprocessed_metadata.tsv \
--o-output-artifact ./QIIME2/output_QIIME2_Notebook/qiime_table_blanksremoved.qza

# Imputation

In [None]:
! qiime imputation-plugin imputation-function \
--i-input-artifact ./QIIME2/output_QIIME2_Notebook/qiime_table_blanksremoved.qza \
--o-output-artifact ./QIIME2/output_QIIME2_Notebook/qiime_table_blanksremoved_imputed.qza

# Normalization

In [None]:
! qiime normalization-plugin normalize-function \
--i-input-artifact ./QIIME2/output_QIIME2_Notebook/qiime_table_blanksremoved_imputed.qza \
--o-output-artifact-frequency ./QIIME2/output_QIIME2_Notebook/qiime_table_blanksremoved_imputed_normalization.qza \
--o-output-artifact-relative ./QIIME2/output_QIIME2_Notebook/qiime_table_blanksremoved_imputed_normalization_relative.qza

# Distance Matrix

In [None]:
p_metric = 'canberra_adkins'

In [None]:
! qiime diversity beta \
  --i-table ./QIIME2/output_QIIME2_Notebook/qiime_table_blanksremoved_imputed_normalization.qza \
  --p-metric $p_metric \
  --o-distance-matrix ./QIIME2/output_QIIME2_Notebook/distance_matrix.qza

# Principal Coordinate Analysis (PCoA)

In [None]:
! qiime diversity pcoa \
  --i-distance-matrix ./QIIME2/output_QIIME2_Notebook/distance_matrix.qza \
  --o-pcoa ./QIIME2/output_QIIME2_Notebook/pcoa.qza

# Emperor plot

In [None]:
! qiime emperor plot \
  --i-pcoa ./QIIME2/output_QIIME2_Notebook/pcoa.qza \
  --m-metadata-file ./QIIME2/output_QIIME2_Notebook/metadata.tsv \
  --o-visualization ./QIIME2/output_QIIME2_Notebook/emperor_plot 


# Visualization

In [None]:
Visualization.load('./QIIME2/output_QIIME2_Notebook/emperor_plot.qzv')

# Classifier Data/Heat Map

In [None]:
metadata_column = 'ATTRIBUTE_Sample_Area'
estimator = 'RandomForestClassifier'
n_estimators = 500
random_state = 123


In [None]:
! qiime sample-classifier classify-samples \
  --i-table ./QIIME2/output_QIIME2_Notebook/qiime_table.qza \
  --m-metadata-file ./QIIME2/output_QIIME2_Notebook/metadata.tsv \
  --m-metadata-column $metadata_column \
  --p-optimize-feature-selection \
  --p-parameter-tuning \
  --p-estimator $estimator \
  --p-n-estimators $n_estimators \
  --p-random-state $random_state \
  --o-accuracy-results ./QIIME2/output_QIIME2_Notebook/accuracy_results.qzv \
  --o-feature-importance ./QIIME2/output_QIIME2_Notebook/feature_importance.qza \
  --o-heatmap ./QIIME2/output_QIIME2_Notebook/heatmap.qzv \
  --o-model-summary ./QIIME2/output_QIIME2_Notebook/model_summary.qzv \
  --o-predictions ./QIIME2/output_QIIME2_Notebook/predictions.qza \
  --o-probabilities ./QIIME2/output_QIIME2_Notebook/probabilities.qza \
  --o-sample-estimator ./QIIME2/output_QIIME2_Notebook/sample_estimator.qza \
  --o-test-targets ./QIIME2/output_QIIME2_Notebook/test_targets.qza \
  --o-training-targets ./QIIME2/output_QIIME2_Notebook/training_targets.qza 


# Visualization

In [None]:
Visualization.load('./QIIME2/output_QIIME2_Notebook/heatmap.qzv')

# PermANOVA

In [None]:
metadata_column_permanova = 'ATTRIBUTE_Sample_Area'

In [None]:
! qiime diversity beta-group-significance \
  --i-distance-matrix ./QIIME2/output_QIIME2_Notebook/distance_matrix.qza \
  --m-metadata-file ./QIIME2/output_QIIME2_Notebook/metadata.tsv \
  --m-metadata-column $metadata_column_permanova \
  --o-visualization ./QIIME2/output_QIIME2_Notebook/permanova.qzv

# Visualization

In [None]:
Visualization.load('./QIIME2/output_QIIME2_Notebook/permanova.qzv')