This jupyter notebook is to add barcodes and adapters to MPRA oligos, and double check to remove restriction enzyme sites

Note that barcodes have already been generated using DNABarcodes for EXP_00014_Saturation_Mutagenesis

Outline:

Append adapters with hemming distance restricted barcodes
Scan sequences for AsisI, PspXI, and SfiI sites and either use different barcode or exclude sequence

In [1]:
import pandas as pd
import math
import re
from Bio import SeqIO

In [2]:
# read in fasta reigions
SCZ_MPRA_regions = pd.read_table("/Users/tshin/Dropbox (CA Walsh Lab)/Walsh Lab/Scripts/Projects_Not_HAR/Misc/Eduardo/saved_data/SCZ_variant_validation_fasta.txt")

In [3]:
SCZ_MPRA_regions

Unnamed: 0,fragment_name,fragment_sequence
0,HAR_3094_sub_sequence_95_var_pos_106_C_A,ACTTTAACTGTACATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
1,HAR_3094_sub_sequence_95_var_pos_190_T_A,ACTTTAACTGTCCATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
2,HAR_3091_sub_sequence_241_var_pos_359_G_T,TTGTTATAACTTCTCTGGCTGTAATTTTTATAAGGAGAATTAGTCC...
3,HAR_3091_sub_sequence_1_var_pos_107_C_T,aacggatgggatgagatgtgggagttagagGATTAATGCTTAGATG...
4,HAR_3094_sub_sequence_95_var_pos_151_G_A,ACTTTAACTGTCCATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
...,...,...
192,rs5011218_mt_A_C,ACTAGATATAATATGTACTATATATAATAGTATACTATACTTTTAT...
193,rs11191359_wt,TCACCTATCTGACTCTTTGTGTGCATTCAGGTCCCAGCTCAAATGT...
194,rs11191359_mt_A_T,TCACCTATCTGACTCTTTGTGTGCATTCAGGTCCCAGCTCAAATGT...
195,rs4146428_wt,TTATTAATAGAGGAATAAATGCCTATCTTGCTAGGTAGTTTAAAGT...


In [4]:
# import barcodes - used shuffled version so consecutive mutagenized sequences don't have relatively similar barcodes
random_barcodes_11N_dist_3 = pd.read_table("/Users/tshin/Dropbox (CA Walsh Lab)/Walsh Lab/Scripts/Projects/HAR_Validation/EXP_00014_HAR_Validation/Saved_Data/random_barcodes_11N_dist_3.shuffled.txt",
                           names = ["barcodes"], index_col= False)

In [5]:
# RE sites to scan for
# AsiSI RE - note that it is symmetrical so only need to scan 1 sequence
AsiSI_RE = "GCGATCGC"

# PspXI RE - note that the last letters we will simplify and remove fromn the search (so can be any letter)
# rather than looking for a V/B. 
# note 2: the RE site is also symmetrical
PspXI_RE = "CTCGAG"

# SfiI RE - note that this RE will use more complex RE for the search
SfiI_RE = "GGCC\w{5}GGCC"

In [6]:
# write a function to test for RE presence
def test_for_RE_presence(df, column):
    # REs
    AsiSI_RE = "GCGATCGC"
    PspXI_RE = "CTCGAG"
    SfiI_RE = "GGCC\w{5}GGCC"
    
    # initialize test df
    
    # test for AsiSI RE
    test_df = df[df[column].str.contains(AsiSI_RE) == True]
    
    # test for PspXI RE
    test_df = pd.concat([test_df, df[df[column].str.contains(PspXI_RE) == True]])
    
    # test for SfiI RE
    test_df = pd.concat([test_df, df[df[column].str.contains(SfiI_RE) == True]])

        
    if sum(test_df[column].str.contains(AsiSI_RE) == True) > 0:
        test_df.loc[test_df[column].str.contains(AsiSI_RE) == True, "AsiSI_RE"] = True
    if sum(test_df[column].str.contains(PspXI_RE) == True) > 0:
        test_df.loc[test_df[column].str.contains(PspXI_RE) == True, "PspXI_RE"] = True
    if sum(test_df[column].str.contains(SfiI_RE) == True) > 0:
        test_df.loc[test_df[column].str.contains(SfiI_RE) == True, "SfiI_RE"] = True
    
    return(test_df.drop_duplicates())

In [7]:
# since the RE searches came up negative, just check to make sure they work with positive controls below
test_seq = pd.DataFrame(list(["ATTAATGCTTAGATGCTTCCTCCAATTTATTTCTCCTATTATATGTGTTAAAAATATATACCATATGTCATGCAGCATGTTTTAATCCCATCAAAATGTCTCACACAATTCCTTAGGGGATATGCTTTTTGCTGCCAAGTTTTCACCGCTGGAATGCTGACTTACTATTCCTTTGCAGAGAGCGATCGCGCTTCATGGTTTATTTTATGATAACATGTTGTTATAACTTCTCTGGCTGTAATTTTTATA",
                             "ATTAATGCTTAGATGCTTCCTCCAATTTATTTCTCCTATTATATGTGGCCNNNNNGGCCGTTAAAAATATATACCATATGTCATGCAGCATGTTTTAATCCCATCAAAATGTCTCACACAATTCCTTAGGGGATATGCTTTTTGCTGCCAAGTTTTCACCGCTGGAATGCTGACTTACTATTCCTTTGCAGAGAGCGATCGCGCTTCATGGTTTATTTTATGATAACATGTTGTTATAACTTCTCTGGCTGTAATTTTTATA",
                             "ATTAATGCTTAGATGCTTCCTCCAATTTATTTCTCCTATCTCGAGTATATGTGTTAAAAATATATACCATATGTCATGCAGCATGTTTTAATCCCATCAAAATGTCTCACACAATTCCTTAGGGGATATGCTTTTTGCTGCCAAGTTTTCACCGCTGGAATGCTGACTTACTATTCCTTTGCAGAGAGCGATCGCGCTTCATGGTTTATTTTATGATAACATGTTGTTATAACTTCTCTGGCTGTAATTTTTATA"]), 
             columns=["test_sequence"])
test_seq_2 = pd.DataFrame(list(["ATTAATGCTTAGATGCTTCCTCCAATTTATTTCTCCTATTATATGTGTTAAAAATATATACCATATGTCATGCAGCATGTTTTAATCCCATCAAAATGTCTCACACAATTCCTTAGGGGATATGCTTTTTGCTGCCAAGTTTTCACCGCTGGAATGCTGACTTACTATTCCTTTGCAGAGACCGATCGCGCTTCATGGTTTATTTTATGATAACATGTTGTTATAACTTCTCTGGCTGTAATTTTTATA",
                             "ATTAATGCTTAGATGCTTCCTCCAATTTATTTCTCCTATTATATGTCGCCNNNNNGGCCGTTAAAAATATATACCATATGTCATGCAGCATGTTTTAATCCCATCAAAATGTCTCACACAATTCCTTAGGGGATATGCTTTTTGCTGCCAAGTTTTCACCGCTGGAATGCTGACTTACTATTCCTTTGCAGAGACCGATCGCGCTTCATGGTTTATTTTATGATAACATGTTGTTATAACTTCTCTGGCTGTAATTTTTATA",
                             "ATTAATGCTTAGATGCTTCCTCCAATTTATTTCTCCTATGTCGAGTATATGTGTTAAAAATATATACCATATGTCATGCAGCATGTTTTAATCCCATCAAAATGTCTCACACAATTCCTTAGGGGATATGCTTTTTGCTGCCAAGTTTTCACCGCTGGAATGCTGACTTACTATTCCTTTGCAGAGACCGATCGCGCTTCATGGTTTATTTTATGATAACATGTTGTTATAACTTCTCTGGCTGTAATTTTTATA"]), 
             columns=["test_sequence"])

In [8]:
test_for_RE_presence(test_seq, "test_sequence")

Unnamed: 0,test_sequence,AsiSI_RE,PspXI_RE,SfiI_RE
0,ATTAATGCTTAGATGCTTCCTCCAATTTATTTCTCCTATTATATGT...,True,,
1,ATTAATGCTTAGATGCTTCCTCCAATTTATTTCTCCTATTATATGT...,True,,True
2,ATTAATGCTTAGATGCTTCCTCCAATTTATTTCTCCTATCTCGAGT...,True,True,


In [9]:
test_for_RE_presence(test_seq_2, "test_sequence")

Unnamed: 0,test_sequence


In [10]:
# search for RE in fragments
test_for_RE_presence(SCZ_MPRA_regions, "fragment_sequence")

Unnamed: 0,fragment_name,fragment_sequence,PspXI_RE,SfiI_RE
84,CMC_MSSM_224_chr9_25677627_A_C_wt,AGCGGATGGCCAAGCGCCACCCGGAAGTGTCCACTGGTGGGGGCGG...,True,
85,CMC_MSSM_224_chr9_25677627_A_C_mt,AGCGGATGGCCAAGCGCCACCCGGAAGTGTCCACTGGTGGGGGCGG...,True,
98,CMC_MSSM_340_chr11_18656210_A_C_wt,GGGCAGGGACTAGGCATTTTAGCAACCAAGGGGGTGTGGCTAAGCG...,True,
99,CMC_MSSM_340_chr11_18656210_A_C_mt,GGGCAGGGACTAGGCATTTTAGCAACCAAGGGGGTGTGGCTAAGCG...,True,
76,CMC_MSSM_199_chrX_3733836_T_G_wt,TCCCGTCGGCCCTCACAGCTCGGCTGGGCTCGGAGTCCGCTCCGCC...,,True
77,CMC_MSSM_199_chrX_3733836_T_G_mt,TCCCGTCGGCCCTCACAGCTCGGCTGGGCTCGGAGTCCGCTCCGCC...,,True
102,CMC_MSSM_343_chr1_28099835_T_G_wt,CCCGCCCTTGCTCTTCCCAGTTTCTCCGTCAGCCTGCGGGTCCCGG...,,True
103,CMC_MSSM_343_chr1_28099835_T_G_mt,CCCGCCCTTGCTCTTCCCAGTTTCTCCGTCAGCCTGCGGGTCCCGG...,,True
128,CMC_PITT_118_chr3_150321366_G_C_wt,ATGCAGTACGCCACGGGGCCGCTGCTCAAGTTCCAGATTTGGTGAG...,,True


In [11]:
SCZ_MPRA_regions

Unnamed: 0,fragment_name,fragment_sequence
0,HAR_3094_sub_sequence_95_var_pos_106_C_A,ACTTTAACTGTACATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
1,HAR_3094_sub_sequence_95_var_pos_190_T_A,ACTTTAACTGTCCATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
2,HAR_3091_sub_sequence_241_var_pos_359_G_T,TTGTTATAACTTCTCTGGCTGTAATTTTTATAAGGAGAATTAGTCC...
3,HAR_3091_sub_sequence_1_var_pos_107_C_T,aacggatgggatgagatgtgggagttagagGATTAATGCTTAGATG...
4,HAR_3094_sub_sequence_95_var_pos_151_G_A,ACTTTAACTGTCCATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
...,...,...
192,rs5011218_mt_A_C,ACTAGATATAATATGTACTATATATAATAGTATACTATACTTTTAT...
193,rs11191359_wt,TCACCTATCTGACTCTTTGTGTGCATTCAGGTCCCAGCTCAAATGT...
194,rs11191359_mt_A_T,TCACCTATCTGACTCTTTGTGTGCATTCAGGTCCCAGCTCAAATGT...
195,rs4146428_wt,TTATTAATAGAGGAATAAATGCCTATCTTGCTAGGTAGTTTAAAGT...


In [12]:
'''
because of the issues with Restriction enzyme sites, 
made a manually modified version that has either first  or last 
base of RE modified, since it was difficult to change them without impacting any potential TFs

removed some less ideal controls to make space
'''

# read in modified oligo list
SCZ_MPRA_regions_fixed = pd.read_table("/Users/tshin/Dropbox (CA Walsh Lab)/Walsh Lab/Scripts/Projects_Not_HAR/Misc/Eduardo/saved_data/SCZ_variant_validation_fasta_RE_fixed.txt")


In [13]:
SCZ_MPRA_regions_fixed

Unnamed: 0,fragment_name,fragment_sequence
0,HAR_3094_sub_sequence_95_var_pos_106_C_A,ACTTTAACTGTACATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
1,HAR_3094_sub_sequence_95_var_pos_190_T_A,ACTTTAACTGTCCATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
2,HAR_3091_sub_sequence_241_var_pos_359_G_T,TTGTTATAACTTCTCTGGCTGTAATTTTTATAAGGAGAATTAGTCC...
3,HAR_3091_sub_sequence_1_var_pos_107_C_T,aacggatgggatgagatgtgggagttagagGATTAATGCTTAGATG...
4,HAR_3094_sub_sequence_95_var_pos_151_G_A,ACTTTAACTGTCCATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
...,...,...
190,rs5011218_mt_A_C,ACTAGATATAATATGTACTATATATAATAGTATACTATACTTTTAT...
191,rs11191359_wt,TCACCTATCTGACTCTTTGTGTGCATTCAGGTCCCAGCTCAAATGT...
192,rs11191359_mt_A_T,TCACCTATCTGACTCTTTGTGTGCATTCAGGTCCCAGCTCAAATGT...
193,rs4146428_wt,TTATTAATAGAGGAATAAATGCCTATCTTGCTAGGTAGTTTAAAGT...


In [14]:
# test for RE presence
test_for_RE_presence(SCZ_MPRA_regions_fixed, "fragment_sequence")

Unnamed: 0,fragment_name,fragment_sequence


So it looks like the new fragments do not have the RE sites!

In [15]:
# 
# filter AsiSI RE containing barcodes
random_barcodes_11N_dist_3_PspXI_filtered = random_barcodes_11N_dist_3.loc[random_barcodes_11N_dist_3["barcodes"].str.contains(PspXI_RE) == False].copy().reindex()
random_barcodes_11N_dist_3_PspXI_filtered = random_barcodes_11N_dist_3_PspXI_filtered.loc[random_barcodes_11N_dist_3_PspXI_filtered["barcodes"].str.contains(AsiSI_RE) == False].reindex()

In [16]:
random_barcodes_11N_dist_3_PspXI_filtered

Unnamed: 0,barcodes
0,TCATCAAGTGG
1,ACGAAGCTCTG
2,GATCCTTCCAG
3,AGAATGAGAGC
4,TGGATACTGAG
...,...
26697,GTTCACCTATG
26698,GAGTACGAGGA
26699,AACGTATGCGA
26700,GGAGTAGTCAT


In [17]:
# add in amplification sites
PspXI_AsiSI_segment = "CCTCGAGGGCGATCGC"
amplification_segment = "TGCGACTGGAGAACGAG"

In [18]:
# concatenate segments onto barcode
# add in partial PspXI and AsiSI site "CGATCGC" to see if that part is sufficient to create a new RE site with the barcode
random_barcodes_11N_dist_3_PspXI_filtered["barcodes_seg_partial"] = "CGATCGC" + random_barcodes_11N_dist_3_PspXI_filtered["barcodes"] + amplification_segment
random_barcodes_11N_dist_3_PspXI_filtered["barcodes_seg_full"] = PspXI_AsiSI_segment + random_barcodes_11N_dist_3_PspXI_filtered["barcodes"] + amplification_segment

# since we know from previous sat mut run that many of the barcodes with the amplification segments are able to create RE sites by change, remove them barcode list
random_barcodes_11N_dist_3_barcode_full_filtered = random_barcodes_11N_dist_3_PspXI_filtered.copy()
random_barcodes_11N_dist_3_barcode_full_filtered = random_barcodes_11N_dist_3_barcode_full_filtered[random_barcodes_11N_dist_3_barcode_full_filtered["barcodes_seg_partial"].str.contains(PspXI_RE) == False]
random_barcodes_11N_dist_3_barcode_full_filtered = random_barcodes_11N_dist_3_barcode_full_filtered[random_barcodes_11N_dist_3_barcode_full_filtered["barcodes_seg_partial"].str.contains(AsiSI_RE) == False]


In [19]:
# test these barcodes + amplifications segments for RE sites
test_for_RE_presence(random_barcodes_11N_dist_3_barcode_full_filtered, "barcodes_seg_partial")

Unnamed: 0,barcodes,barcodes_seg_partial,barcodes_seg_full


The barcodes with the amplication sites appear to be sufficient and RE free

In [20]:
random_barcodes_11N_dist_3_barcode_full_filtered

Unnamed: 0,barcodes,barcodes_seg_partial,barcodes_seg_full
0,TCATCAAGTGG,CGATCGCTCATCAAGTGGTGCGACTGGAGAACGAG,CCTCGAGGGCGATCGCTCATCAAGTGGTGCGACTGGAGAACGAG
1,ACGAAGCTCTG,CGATCGCACGAAGCTCTGTGCGACTGGAGAACGAG,CCTCGAGGGCGATCGCACGAAGCTCTGTGCGACTGGAGAACGAG
2,GATCCTTCCAG,CGATCGCGATCCTTCCAGTGCGACTGGAGAACGAG,CCTCGAGGGCGATCGCGATCCTTCCAGTGCGACTGGAGAACGAG
3,AGAATGAGAGC,CGATCGCAGAATGAGAGCTGCGACTGGAGAACGAG,CCTCGAGGGCGATCGCAGAATGAGAGCTGCGACTGGAGAACGAG
4,TGGATACTGAG,CGATCGCTGGATACTGAGTGCGACTGGAGAACGAG,CCTCGAGGGCGATCGCTGGATACTGAGTGCGACTGGAGAACGAG
...,...,...,...
26697,GTTCACCTATG,CGATCGCGTTCACCTATGTGCGACTGGAGAACGAG,CCTCGAGGGCGATCGCGTTCACCTATGTGCGACTGGAGAACGAG
26698,GAGTACGAGGA,CGATCGCGAGTACGAGGATGCGACTGGAGAACGAG,CCTCGAGGGCGATCGCGAGTACGAGGATGCGACTGGAGAACGAG
26699,AACGTATGCGA,CGATCGCAACGTATGCGATGCGACTGGAGAACGAG,CCTCGAGGGCGATCGCAACGTATGCGATGCGACTGGAGAACGAG
26700,GGAGTAGTCAT,CGATCGCGGAGTAGTCATTGCGACTGGAGAACGAG,CCTCGAGGGCGATCGCGGAGTAGTCATTGCGACTGGAGAACGAG


In [21]:
# add partial RE to oligo sequence
SCZ_MPRA_regions_fixed["fragment_sequence_partial"] = SCZ_MPRA_regions_fixed["fragment_sequence"] + "CCTCGA"
test_for_RE_presence(SCZ_MPRA_regions_fixed, "fragment_sequence_partial")

Unnamed: 0,fragment_name,fragment_sequence,fragment_sequence_partial


In [22]:
# for final segment oligo list make 10x copy of each fragment then add barcodes with amplification/RE segments 
# half SfiI site F
half_SfiI_F_segment = "ACTGGCCGCTTCACTG"

# note name change to oligo_sequence
all_oligo_sequences = pd.DataFrame(columns = ["oligo_sequence"])
# add 10 copies of mutagenized sequences (both HAR 3091 and 3094)
all_oligo_sequences = pd.concat([all_oligo_sequences,
    pd.DataFrame(
        SCZ_MPRA_regions_fixed["fragment_sequence"].repeat(10).rename("oligo_sequence").reset_index(drop=True)
    )]
).reset_index(drop=True)



In [23]:
all_oligo_sequences

Unnamed: 0,oligo_sequence
0,ACTTTAACTGTACATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
1,ACTTTAACTGTACATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
2,ACTTTAACTGTACATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
3,ACTTTAACTGTACATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
4,ACTTTAACTGTACATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...
...,...
1945,TTATTAATAGAGGAATAAATGCCTATCTTGCTAGGTAGTTTAAAGT...
1946,TTATTAATAGAGGAATAAATGCCTATCTTGCTAGGTAGTTTAAAGT...
1947,TTATTAATAGAGGAATAAATGCCTATCTTGCTAGGTAGTTTAAAGT...
1948,TTATTAATAGAGGAATAAATGCCTATCTTGCTAGGTAGTTTAAAGT...


In [24]:
random_barcodes_11N_dist_3_barcode_full_filtered["barcodes_seg_full"].reset_index(drop=True)

0        CCTCGAGGGCGATCGCTCATCAAGTGGTGCGACTGGAGAACGAG
1        CCTCGAGGGCGATCGCACGAAGCTCTGTGCGACTGGAGAACGAG
2        CCTCGAGGGCGATCGCGATCCTTCCAGTGCGACTGGAGAACGAG
3        CCTCGAGGGCGATCGCAGAATGAGAGCTGCGACTGGAGAACGAG
4        CCTCGAGGGCGATCGCTGGATACTGAGTGCGACTGGAGAACGAG
                             ...                     
26595    CCTCGAGGGCGATCGCGTTCACCTATGTGCGACTGGAGAACGAG
26596    CCTCGAGGGCGATCGCGAGTACGAGGATGCGACTGGAGAACGAG
26597    CCTCGAGGGCGATCGCAACGTATGCGATGCGACTGGAGAACGAG
26598    CCTCGAGGGCGATCGCGGAGTAGTCATTGCGACTGGAGAACGAG
26599    CCTCGAGGGCGATCGCTTAGGTTCACCTGCGACTGGAGAACGAG
Name: barcodes_seg_full, Length: 26600, dtype: object

In [25]:
# add barcodes
all_oligo_sequences["full_oligo"] = half_SfiI_F_segment + all_oligo_sequences["oligo_sequence"] + random_barcodes_11N_dist_3_barcode_full_filtered["barcodes_seg_full"].reset_index(drop=True)
all_oligo_sequences["full_oligo"] = all_oligo_sequences["full_oligo"].str.upper()
all_oligo_sequences["barcodes"] = random_barcodes_11N_dist_3_barcode_full_filtered["barcodes"].reset_index(drop=True)

In [26]:
all_oligo_sequences

Unnamed: 0,oligo_sequence,full_oligo,barcodes
0,ACTTTAACTGTACATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...,ACTGGCCGCTTCACTGACTTTAACTGTACATGAGATTGTGTCAGTC...,TCATCAAGTGG
1,ACTTTAACTGTACATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...,ACTGGCCGCTTCACTGACTTTAACTGTACATGAGATTGTGTCAGTC...,ACGAAGCTCTG
2,ACTTTAACTGTACATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...,ACTGGCCGCTTCACTGACTTTAACTGTACATGAGATTGTGTCAGTC...,GATCCTTCCAG
3,ACTTTAACTGTACATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...,ACTGGCCGCTTCACTGACTTTAACTGTACATGAGATTGTGTCAGTC...,AGAATGAGAGC
4,ACTTTAACTGTACATGAGATTGTGTCAGTCCTTTCTGAAACACTGG...,ACTGGCCGCTTCACTGACTTTAACTGTACATGAGATTGTGTCAGTC...,TGGATACTGAG
...,...,...,...
1945,TTATTAATAGAGGAATAAATGCCTATCTTGCTAGGTAGTTTAAAGT...,ACTGGCCGCTTCACTGTTATTAATAGAGGAATAAATGCCTATCTTG...,GATAAGATGCG
1946,TTATTAATAGAGGAATAAATGCCTATCTTGCTAGGTAGTTTAAAGT...,ACTGGCCGCTTCACTGTTATTAATAGAGGAATAAATGCCTATCTTG...,TGGAAGTGGAG
1947,TTATTAATAGAGGAATAAATGCCTATCTTGCTAGGTAGTTTAAAGT...,ACTGGCCGCTTCACTGTTATTAATAGAGGAATAAATGCCTATCTTG...,TAAGGCGGCAA
1948,TTATTAATAGAGGAATAAATGCCTATCTTGCTAGGTAGTTTAAAGT...,ACTGGCCGCTTCACTGTTATTAATAGAGGAATAAATGCCTATCTTG...,ACAGTGATACC


In [27]:
# write oligo design
all_oligo_sequences.to_csv("/Users/tshin/Dropbox (CA Walsh Lab)/Walsh Lab/Scripts/Projects_Not_HAR/Misc/Eduardo/EXP_00027_final_oligos_06_17_22/EXP_00027_SCZ_variant_validation_oligo_design.txt", sep = "\t", index = False)

all_oligo_sequences["full_oligo"].to_csv("/Users/tshin/Dropbox (CA Walsh Lab)/Walsh Lab/Scripts/Projects_Not_HAR/Misc/Eduardo/EXP_00027_final_oligos_06_17_22/EXP_00027_SCZ_variant_validation_oligo_sequences.txt", sep = "\t", header = None, index = False)