## Demultiplex Reads by DNA marker and locations

Xiaoping Li  
Oregon State University Hermiston Agricultural Research and Extension Center

1/15/2020

In [1]:
from sort_reads import Sort_reads
from metaBarTools import metaBar_PreX
import os

### Our first task is to separate rbcL and ITS2 reads in the raw reads

In [2]:
# locating raw reads
readspath1 = "reads1 path"
readspath2 = "reads2 path"
readscopyfolder1 = os.path.join(os.getcwd(), 'beepollen1-reads-copy')
readscopyfolder2 = os.path.join(os.getcwd(), 'beepollen2-reads-copy')

In [3]:
# metafile for qiime2
grouping_file = "./meta_group_location.csv" # meta_group_location.csv
metafile = os.path.abspath("./meta_beepollen_all.csv")
platesetup = os.path.abspath("./beepollen_all.xlsx")

In [4]:
metaBar = metaBar_PreX()

In [21]:
path_ITS, path_rbcL = metaBar.metaBar_makeSubDir("Regional_reads", ["", ""])


        ####################################################
        #########          metaBar_makeSubDir     ##########
        ####################################################
        


In [None]:
# use metaBar_Copy to copy all the reads to working directory, so we won't mess up the original reads. after copying please comment the following command out

metaBar.metaBar_Copy(readspath1, readscopyfolder1)
metaBar.metaBar_Copy(readspath2, readscopyfolder2)

In [8]:
# separating the mixed reads (rbcL and ITS2)
!python demuplex_reads.py --reads $readscopyfolder1 $readscopyfolder2 --m 3 --primer ./primerfile

Total reads 1114
ITS-reads: 100%|████████████████████████████| 557/557 [6:17:38<00:00, 22.05s/it]
rbcL-reads: 100%|███████████████████████████| 557/557 [1:24:32<00:00,  4.87s/it]
Completed!


### Second task is to separate the reads from above by location 

In [22]:
# move the ITS reads and rbcL reads to the Regional_reads folder

!mv ITS-reads/ Regional_reads/
!mv rbcL-reads/ Regional_reads/

In [23]:
# separate ITS and rbcL reads
ITS_reads_all = os.path.abspath('./Regional_reads/ITS-reads/')
rbcL_reads_all = os.path.abspath('./Regional_reads/rbcL-reads/')

In [31]:
ITS_reads_all

'/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Regional_reads/ITS-reads'

In [26]:
ITS_reads_local, rbcL_reads_local = metaBar.metaBar_makeSubDir("Local_reads", ["ITS_reads_local", "rbcL_reads_local"])


        ####################################################
        #########          metaBar_makeSubDir     ##########
        ####################################################
        


In [27]:
# grouping ITS2 by study sites
grouped_ITS = Sort_reads(ITS_reads_all, metafile)
grouping_ITS = grouped_ITS.grouping_reads("Location")
grouping_ITS_path = grouping_ITS.copy_reads(ITS_reads_local, "reads_copy_by_location")

Blank # reads - 12 - progress: 100%|██████████| 12/12 [00:00<00:00, 869.42it/s]
Mix # reads - 30 - progress:  20%|██        | 6/30 [00:00<00:00, 53.64it/s]

Original total number of reads: 1114

There are 1114 reads in grouped files


Mix # reads - 30 - progress: 100%|██████████| 30/30 [00:00<00:00, 67.20it/s]
OA # reads - 6 - progress: 100%|██████████| 6/6 [00:00<00:00, 72.37it/s]
PCR_Neg # reads - 14 - progress: 100%|██████████| 14/14 [00:00<00:00, 733.80it/s]
PG # reads - 6 - progress: 100%|██████████| 6/6 [00:00<00:00, 66.85it/s]
Plate-Blank # reads - 14 - progress: 100%|██████████| 14/14 [00:00<00:00, 2144.09it/s]
RDO # reads - 318 - progress: 100%|██████████| 318/318 [00:06<00:00, 47.22it/s]
SO # reads - 6 - progress: 100%|██████████| 6/6 [00:00<00:00, 117.91it/s]
Star # reads - 448 - progress: 100%|██████████| 448/448 [00:18<00:00, 23.69it/s]
TM # reads - 6 - progress: 100%|██████████| 6/6 [00:00<00:00, 55.24it/s]
VV # reads - 6 - progress: 100%|██████████| 6/6 [00:00<00:00, 46.34it/s]
ZUM # reads - 248 - progress: 100%|██████████| 248/248 [00:11<00:00, 20.78it/s]

Completed





In [28]:
grouping_ITS_path

{'Blank': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Local_reads/ITS_reads_local/reads_copy_by_location/Blank',
 'Mix': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Local_reads/ITS_reads_local/reads_copy_by_location/Mix',
 'OA': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Local_reads/ITS_reads_local/reads_copy_by_location/OA',
 'PCR_Neg': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Local_reads/ITS_reads_local/reads_copy_by_location/PCR_Neg',
 'PG': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Local_reads/ITS_reads_local/reads_copy_by_location/PG',
 'Plate-Blank': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Local_reads/ITS_reads_local/reads_copy_by_location/Plate-Blank',
 'RDO': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Loca

In [29]:
# grouping rbcL by study sites

grouped_rbcL = Sort_reads(rbcL_reads_all, metafile)
grouping_rbcL = grouped_rbcL.grouping_reads("Location")
grouping_rbcL_path = grouping_rbcL.copy_reads(rbcL_reads_local, "reads_copy_by_location")

Blank # reads - 12 - progress: 100%|██████████| 12/12 [00:00<00:00, 3990.77it/s]
Mix # reads - 30 - progress: 100%|██████████| 30/30 [00:00<00:00, 5576.79it/s]
OA # reads - 6 - progress:   0%|          | 0/6 [00:00<?, ?it/s]

Original total number of reads: 1114

There are 1114 reads in grouped files


OA # reads - 6 - progress: 100%|██████████| 6/6 [00:00<00:00, 24.20it/s]
PCR_Neg # reads - 14 - progress: 100%|██████████| 14/14 [00:00<00:00, 4771.29it/s]
PG # reads - 6 - progress: 100%|██████████| 6/6 [00:00<00:00, 3693.79it/s]
Plate-Blank # reads - 14 - progress: 100%|██████████| 14/14 [00:00<00:00, 4209.94it/s]
RDO # reads - 318 - progress: 100%|██████████| 318/318 [00:00<00:00, 4966.58it/s]
SO # reads - 6 - progress: 100%|██████████| 6/6 [00:00<00:00, 58.37it/s]
Star # reads - 448 - progress: 100%|██████████| 448/448 [00:01<00:00, 269.26it/s]
TM # reads - 6 - progress: 100%|██████████| 6/6 [00:00<00:00, 2068.03it/s]
VV # reads - 6 - progress: 100%|██████████| 6/6 [00:00<00:00, 79.09it/s]
ZUM # reads - 248 - progress: 100%|██████████| 248/248 [00:00<00:00, 3937.26it/s]

Completed





In [30]:
grouping_rbcL_path

{'Blank': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Local_reads/rbcL_reads_local/reads_copy_by_location/Blank',
 'Mix': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Local_reads/rbcL_reads_local/reads_copy_by_location/Mix',
 'OA': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Local_reads/rbcL_reads_local/reads_copy_by_location/OA',
 'PCR_Neg': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Local_reads/rbcL_reads_local/reads_copy_by_location/PCR_Neg',
 'PG': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Local_reads/rbcL_reads_local/reads_copy_by_location/PG',
 'Plate-Blank': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_github/Local_reads/rbcL_reads_local/reads_copy_by_location/Plate-Blank',
 'RDO': '/media/swaggyp1985/HDD4T/OSU_Projects_2017-2018/running_project/beepollen_githu