In [None]:
# Run this script by entering the 10xbc environment and then running: python /path/to/StepOnePrepBarcodesCellRanger.py

import Bio
from Bio import SeqIO
from Bio.Seq import Seq
from rapidfuzz import fuzz
import matplotlib as plt
from matplotlib import pyplot
import numpy as np
from glob import glob
import os
import subprocess
from time import sleep
import statistics
import shutil
import re
import jstyleson

from Functions.cDNA_10x_starcode_prep import cDNA_10x_starcode_prep
from Functions.gDNA_starcode_prep import gDNA_starcode_prep

from Functions.cDNA_10x_after_starcode import cDNA_10x_after_starcode
from Functions.gDNA_after_starcode import gDNA_after_starcode

print(" ")
print("Running...")
print(" ")


# Find paths_and_variables.json file
path_to_script = os.path.abspath(os.getcwd())

path = path = os.path.expanduser(path_to_script +"/paths_and_variables.json")


# read paths_and_variables.json file
with open(path, 'r') as myfile:
    data=myfile.read()

result_dict =  jstyleson.loads(data) # Raise Exception

scripts=result_dict['scripts']   #path to scripts
Fastqfolder10x=result_dict['Fastqfolder10x']  #Folder that contains all folders containing FASTQ files generated from sequencing the barcodes
FastqfoldergDNA=result_dict['FastqfoldergDNA']#Folder that contains all folders containing gDNA FASTQ files generated from sequencing the barcodes.
Outfolder= result_dict['Outfolder']           #folder you want outputs go go into (dont make this folder, this scipt will make it)
strtseq= result_dict['strtseq']               #common sequence right before starcode starts
barcodeSource = result_dict['barcodeSource']  #determine whether the data has barcodes from 10x ("10x"), gDNA ("gDNA"), or both "both"
GSAMP= result_dict['GSAMP']            #Define which samples should be run together in starcode
bclen = result_dict['bclen']           #length to keep from sequenced barcode
strtseq =  result_dict['strtseq']      #common sequence right before starcode starts
strtseq_revcomp =  result_dict['strtseq_revcomp'] #rev_comp common sequence right before starcode starts
startseqMatch =  result_dict['startseqMatch']     # The percentage match you for startseq to be called as correct in a barcode
sc_mm =  result_dict['sc_mm']          #allowed number of mismatches between barcodes to be called the same (starcode input)


#define funtion to determine file has something in the first line (consider files with nothing in the first line as empty)
def empty(fname):
    with open(fname) as f:
       return f.readline() == ""

#define funtion to determine if folder exist
def does_folder_exist(path_to_folder):
    if not os.path.exists(path_to_folder):
        os.mkdir(path_to_folder)
    # else:
    #     raise Exception("folder {} already exists".format(path_to_folder))



#--------------------------------------------------------------------------
# Define all paths and create folders

#define any new paths
sc_in  = Outfolder + "/starcode_inputs/"
sc_out = Outfolder + "/starcode_outputs/"
mod_R2 = Outfolder + "/Modified_fastq/"
CellR  = Outfolder + "/CellRanger_inputs/"
CellRfq = CellR + "FASTQ/"

if barcodeSource == 'both' or barcodeSource == '10x':
    filt_haveStart_10x = Outfolder + '/fastq_with_startseq_10x'
    filt_highQscore_10x = Outfolder + '/fastq_with_highQscore_10x'
    filt_WSN_10x = Outfolder + "/filtered_fastq_WSN_10x_Final_BC/"

if barcodeSource == 'both' or barcodeSource == 'gDNA':
    filt_haveStart_gDNA = Outfolder + '/fastq_with_startseq_gDNA'
    filt_highQscore_gDNA = Outfolder + '/fastq_with_highQscore_gDNA'
    filt_WSN_gDNA = Outfolder + "/filtered_fastq_WSN_gDNA_Final_BC/"


# Add starcode to PATH
starcode_path = scripts + '/starcode/'
os.environ["PATH"] += starcode_path


# Make any necessary files
path_to_folders = [Outfolder,sc_in,sc_out,mod_R2,CellR,CellRfq]

if barcodeSource == 'both' or barcodeSource == '10x':
    path_to_folders.extend([filt_haveStart_10x,filt_WSN_10x])

if barcodeSource == 'both' or barcodeSource == 'gDNA':
    path_to_folders.extend([filt_haveStart_gDNA,filt_WSN_gDNA])

# checking whether folder/directory exists
for path_to_folder in path_to_folders:
    does_folder_exist(path_to_folder)


if barcodeSource == 'both' or barcodeSource == '10x':
    #unzip all files created by 10x for barcode runs
    gunzipCommand = ['gunzip', '-r', Fastqfolder10x]
    subprocess.call(gunzipCommand)

if barcodeSource == 'both' or barcodeSource == 'gDNA':
    #unzip all files created by 10x for barcode runs
    gunzipCommand = ['gunzip', '-r', FastqfoldergDNA]
    subprocess.call(gunzipCommand)






