# O Colab irá reiniciar nessa primeira etapa. Isso é normal.

In [None]:
!pip install -q condacolab
import condacolab
condacolab.install_miniconda()

# Aqui o Colab irá pedir pela autorização para conectar ao drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

# Aqui o pipeline irá fazer o download de arquivos externos necessários e criar uma estrutura de pastas

In [None]:
%%shell

cp gdrive/MyDrive/trab2/metadata-file.tsv metadata-file.tsv
cp gdrive/MyDrive/trab2/classifier-ncbi-unite.qza classifier-ncbi-unite.qza

wget -O "patient_joao_MICROBIOMA16S_S69_R1_001.fastq.gz" "https://aulas-pos-hiae-public-data.s3.sa-east-1.amazonaws.com/TCC-metagenomica/patient_joao_MICROBIOMA16S_S69_R1_001.fastq.gz"
wget -O "patient_joao_MICROBIOMA16S_S69_R2_001.fastq.gz" "https://aulas-pos-hiae-public-data.s3.sa-east-1.amazonaws.com/TCC-metagenomica/patient_joao_MICROBIOMA16S_S69_R2_001.fastq.gz"
wget -O "gg-13-8-99-515-806-nb-classifier.qza" "https://data.qiime2.org/2021.2/common/gg-13-8-99-515-806-nb-classifier.qza"

mkdir -p fastq import dada2 visualization tree taxonomy

echo "sample-id" > sample-id.txt
echo "forward-absolute-filepath" > forward-absolute-filepath.txt
echo "reverse-absolute-filepath" > reverse-absolute-filepath.txt

ls *R1* | awk -F _ '{print $1}' >> sample-id.txt
find $PWD/*R1* >> forward-absolute-filepath.txt
find $PWD/*R2* >> reverse-absolute-filepath.txt

paste sample-id.txt forward-absolute-filepath.txt reverse-absolute-filepath.txt > manifest-file.tsv

# Rodar o próximo bloco para análise 16s

In [None]:
%%shell

wget https://data.qiime2.org/distro/core/qiime2-2021.2-py36-linux-conda.yml
conda env create -n qiime2-2021.2 --file qiime2-2021.2-py36-linux-conda.yml -qq

eval "$(conda shell.bash hook)"
conda activate qiime2-2021.2

qiime tools import --type 'SampleData[PairedEndSequencesWithQuality]' --input-path manifest-file.tsv --output-path import/import.qza --input-format PairedEndFastqManifestPhred33V2
qiime demux summarize --i-data import/import.qza --o-visualization visualization/import.qzv

qiime dada2 denoise-paired --i-demultiplexed-seqs import/import.qza \
 --p-trunc-len-f 250 \
 --p-trunc-len-r 250 \
 --p-trim-left-f 17 \
 --p-trim-left-r 21 \
 --o-representative-sequences dada2/rep-seqs.qza \
 --p-n-threads 8 \
 --o-table table.qza \
 --o-denoising-stats dada2/stats.qza

qiime feature-table tabulate-seqs --i-data dada2/rep-seqs.qza --o-visualization visualization/rep-seqs.qzv
qiime metadata tabulate --m-input-file dada2/stats.qza --o-visualization visualization/stats.qzv

qiime feature-classifier classify-sklearn \
 --i-classifier gg-13-8-99-515-806-nb-classifier.qza \
 --p-reads-per-batch 10000 \
 --i-reads dada2/rep-seqs.qza \
 --o-classification taxonomy/taxonomy.qza

qiime metadata tabulate \
 --m-input-file taxonomy/taxonomy.qza \
 --o-visualization visualization/taxonomy.qzv

qiime taxa barplot \
  --i-table table.qza \
  --i-taxonomy taxonomy/taxonomy.qza \
  --m-metadata-file metadata-file.tsv \
  --o-visualization visualization/taxa-bar-plots.qzv

conda deactivate

# Rodar o próximo bloco para análise ITS

In [None]:
%%shell

wget https://data.qiime2.org/distro/core/qiime2-2022.2-py38-linux-conda.yml
conda env create -n qiime2-2022.2 --file qiime2-2022.2-py38-linux-conda.yml -qq

eval "$(conda shell.bash hook)"
conda activate qiime2-2022.2

qiime dada2 denoise-paired \
  --i-demultiplexed-seqs import/import.qza \
  --p-trunc-len-f 200 \
  --p-trunc-len-r 200 \
  --p-trim-left-f 17 \
  --p-trim-left-r 21 \
  --o-representative-sequences rep-seqs-ncbi-unite.qza \
  --p-n-threads 12 \
  --o-table table-ncbi-unite.qza \
  --o-denoising-stats stats-ncbi-unite.qza

qiime feature-table tabulate-seqs --i-data rep-seqs-ncbi-unite.qza --o-visualization visualization/rep-seqs2.qzv
qiime metadata tabulate --m-input-file stats-ncbi-unite.qza --o-visualization visualization/stats2.qzv

qiime feature-classifier classify-sklearn \
 --i-classifier classifier-ncbi-unite.qza \
 --p-reads-per-batch 10000 \
 --i-reads rep-seqs-ncbi-unite.qza \
 --o-classification taxonomy/taxonomy2.qza

qiime metadata tabulate \
 --m-input-file taxonomy/taxonomy2.qza \
 --o-visualization visualization/taxonomy2.qzv

qiime taxa barplot \
  --i-table table-ncbi-unite.qza \
  --i-taxonomy taxonomy/taxonomy2.qza \
  --m-metadata-file metadata-file.tsv \
  --o-visualization visualization/taxa-bar-plots2.qzv

conda deactivate

# Rodar os próximos blocos para gerar os gráficos

In [None]:
import pandas as pd
import os

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

df16s = pd.read_csv('gdrive/MyDrive/trab2/results-16s.csv', sep=',', index_col=False)
dfits = pd.read_csv('gdrive/MyDrive/trab2/results-its.csv', sep=',', index_col=False)

new_headers = []
for col in df16s.columns.values:
  found = False
  offset = 1
  while found == False:
    split = col.split(';')
    if len( split[ -1 * offset ] ) > 3:
      new_headers.append( split[ -1 * offset ] )
      found = True
    else:
      if len( split ) > offset:
        offset = offset + 1
      else:
        new_headers.append( '' )
        found = True
df16s.columns = new_headers
df16s_filtered = df16s.loc[:,['__' in i for i in df16s.columns]]
df16s_plottable = df16s_filtered.transpose()

new_headers = []
for col in dfits.columns.values:
  found = False
  offset = 1
  while found == False:
    split = col.split(';')
    if len( split[ -1 * offset ] ) > 3:
      new_headers.append( split[ -1 * offset ] )
      found = True
    else:
      if len( split ) > offset:
        offset = offset + 1
      else:
        new_headers.append( '' )
        found = True
dfits.columns = new_headers
dfits_filtered = dfits.loc[:,['__' in i for i in dfits.columns]]
dfits_plottable = dfits_filtered.transpose()

In [None]:
df16s_plottable.plot.bar(legend=False)

In [None]:
df16s_plottable.plot.pie(y=0,legend=False,autopct='%1.1f%%')

In [None]:
dfits_plottable.plot.bar(legend=False)

In [None]:
dfits_plottable.plot.pie(y=0,legend=False,autopct='%1.1f%%')