# Vorbereitung

In [1]:
# Setup

import os, sys
import pandas as pd

import qiime2
from tempfile import mkdtemp
from qiime2.plugins import demux, deblur, quality_filter, metadata, feature_table, alignment, phylogeny, diversity, emperor, feature_classifier, taxa, composition

workdir = os.getcwd()

if 'project.ipynb' in os.listdir(workdir):
    datadir = workdir + '/data'
    outputdir = workdir + '/output'
    sequencedir = datadir + '/sequences'

    # Create directories
    !mkdir -p data
    !mkdir -p data/sequences
    !mkdir -p output
    !mkdir -p output/viz
else:
    raise RuntimeError("Working directory not notebook directory. The Jupyter server has to be started in this notebook's directory")

print(os.getcwd())

print(f'Working directory: {workdir}')
print(f'Data directory: {datadir}')

%cd $workdir

/mnt/e/dev/pda
Working directory: /mnt/e/dev/pda
Data directory: /mnt/e/dev/pda/data
/mnt/e/dev/pda


In [2]:
# Rename files

df = pd.read_csv(f"{datadir}/run_prefix.tsv", sep="\t")
prefix = df[["sample_name", "run_prefix"]]

# listing directories
file_list = os.listdir(sequencedir)

if not '1629.SubjectIBD001_L2_L006_R1_001.fastq.gz' in file_list:
    for file in file_list:
        if file[:4] == "1629":
            sample_id = file[:18]
            run_prefix = prefix[prefix['sample_name'] == sample_id]['run_prefix'].tolist()[0]
            src = sample_id + ".fastq.gz"
            dst = sample_id + run_prefix[3:15]+"001.fastq.gz"
            os.rename(f'{sequencedir}/{src}', f'{sequencedir}/{dst}')
            print(src)
            print(dst)

In [None]:
# Load data

if not 'demux-single-end.qza' in os.listdir(outputdir):
    !qiime tools import --type 'SampleData[SequencesWithQuality]' --input-path data/sequences --input-format CasavaOneEightSingleLanePerSampleDirFmt --output-path output/demux-single-end.qza
    !qiime demux summarize --i-data output/demux-single-end.qza --o-visualization output/viz/demux-single-end.qzv
    !qiime quality-filter q-score --i-demux output/demux-single-end.qza --o-filtered-sequences output/demux-filtered.qza --o-filter-stats output/demux-filter-stats.qza --verbose
    !qiime deblur denoise-16S --i-demultiplexed-seqs output/demux-filtered.qza --p-trim-length 90 --o-representative-sequences output/rep-seqs-deblur.qza --o-table output/table-deblur.qza --verbose --p-sample-stats --p-jobs-to-start 8 --o-stats output/deblur-stats.qza
    !qiime phylogeny align-to-tree-mafft-fasttree   --i-sequences output/rep-seqs.qza   --o-alignment output/aligned-rep-seqs.qza   --o-masked-alignment output/masked-aligned-rep-seqs.qza   --o-tree output/unrooted-tree.qza   --o-rooted-tree output/rooted-tree.qza --verbose
    !qiime diversity core-metrics-phylogenetic --i-phylogeny output/rooted-tree.qza --i-table output/table.qza --p-sampling-depth 1103 --m-metadata-file data/metadata.tsv --output-dir output/core-metrics-results --verbose

    
# Import via this code line below, only if CLI fails.
# sequences = qiime2.Artifact.import_data('SampleData[SequencesWithQuality]', f'{datadir}/sequences', view_type='CasavaOneEightSingleLanePerSampleDirFmt')
# sequences = qiime2.Artifact.load(f'{outputdir}/demux-single-end.qza')
# metadata = qiime2.Metadata.load(f'{datadir}/metadata.tsv')

# Save as .qza file if imported via Python
# sequences.save(f'{outputdir}/demux-single-end.qza')

In [None]:
# demux_summary = demux.visualizers.summarize(sequences.per_sample_sequences)
# demux_summary.visualization