In [3]:
import sys, os, gc
#from ipywidgets import FileUpload
#from IPython.display import display
#from Bio.Seq import Seq

import pandas as pd
import numpy as np

In [1]:
dict_sgRNA = {
    "LTR2B sgRNA1":"TTAACTACTGGGTTTAGGCC",
    "LTR2B sgRNA2":"TAGTGTTGTGAGCCCTTAAA",
    "LTR2B sgRNA3":"GACACCGAGTTGTAGAAGGA",
    "LTR2B sgRNA4":"CTTTATTCAGCTGGGAGCAT",
}

In [22]:
df_sgRNA = pd.DataFrame.from_dict(dict_sgRNA, orient='index', columns=['sgRNA'])
df_sgRNA.index.name = 'sgRNA_name'
df_sgRNA['name'] = df_sgRNA.index
df_sgRNA

Unnamed: 0_level_0,sgRNA,name
sgRNA_name,Unnamed: 1_level_1,Unnamed: 2_level_1
LTR2B sgRNA1,TTAACTACTGGGTTTAGGCC,LTR2B sgRNA1
LTR2B sgRNA2,TAGTGTTGTGAGCCCTTAAA,LTR2B sgRNA2
LTR2B sgRNA3,GACACCGAGTTGTAGAAGGA,LTR2B sgRNA3
LTR2B sgRNA4,CTTTATTCAGCTGGGAGCAT,LTR2B sgRNA4


In [23]:
# Ligates into 5'BstXI-BlpI3' digested backbone
# 5' forward, 5' end
FiveF_5E = 'TTG'
FiveF_3E = 'GTTTAAGAGC'

# 3' reverse (not reverse complement)
ThreeR_5E= 'CTTGTTG'
ThreeR_3E = 'GTTTAAGAGCTAA'

base2complement = {"A":"T", "T":"A", "C":"G", "G":"C"}

def create_F_sequence(seq:str, FiveF_5E:str="TTG", FiveF_3E:str='GTTTAAGAGC'):
    # FiveF_5E + seq + FiveF_3E
    return FiveF_5E + seq + FiveF_3E

def create_R_sequence(seq:str, ThreeR_5E= 'CTTGTTG', ThreeR_3E = 'GTTTAAGAGCTAA'):
    ret = ThreeR_5E + seq + ThreeR_3E  # concat alll parts
    ret = ret[::-1] # reverse sequence
    ret = [base2complement[base] for base in ret]  # take complement
    return "".join(ret)

In [24]:
df_sgRNA['F_oligo'] = df_sgRNA['sgRNA'].apply(create_F_sequence)
df_sgRNA['R_oligo'] = df_sgRNA['sgRNA'].apply(create_R_sequence)

df_sgRNA

Unnamed: 0_level_0,sgRNA,name,F_oligo,R_oligo
sgRNA_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LTR2B sgRNA1,TTAACTACTGGGTTTAGGCC,LTR2B sgRNA1,TTGTTAACTACTGGGTTTAGGCCGTTTAAGAGC,TTAGCTCTTAAACGGCCTAAACCCAGTAGTTAACAACAAG
LTR2B sgRNA2,TAGTGTTGTGAGCCCTTAAA,LTR2B sgRNA2,TTGTAGTGTTGTGAGCCCTTAAAGTTTAAGAGC,TTAGCTCTTAAACTTTAAGGGCTCACAACACTACAACAAG
LTR2B sgRNA3,GACACCGAGTTGTAGAAGGA,LTR2B sgRNA3,TTGGACACCGAGTTGTAGAAGGAGTTTAAGAGC,TTAGCTCTTAAACTCCTTCTACAACTCGGTGTCCAACAAG
LTR2B sgRNA4,CTTTATTCAGCTGGGAGCAT,LTR2B sgRNA4,TTGCTTTATTCAGCTGGGAGCATGTTTAAGAGC,TTAGCTCTTAAACATGCTCCCAGCTGAATAAAGCAACAAG


In [31]:
df_ordersheet = df_sgRNA.melt(id_vars=['sgRNA', 'name'], value_name="Sequence")
df_ordersheet['order_name'] = df_ordersheet.apply(lambda x: "{} {}".format(x['name'], x['variable'][0]), axis=1)
cols = ['order_name', 'Sequence']
df_ordersheet = df_ordersheet[cols].sort_values('order_name')
df_ordersheet

Unnamed: 0,order_name,Sequence
0,LTR2B sgRNA1 F,TTGTTAACTACTGGGTTTAGGCCGTTTAAGAGC
4,LTR2B sgRNA1 R,TTAGCTCTTAAACGGCCTAAACCCAGTAGTTAACAACAAG
1,LTR2B sgRNA2 F,TTGTAGTGTTGTGAGCCCTTAAAGTTTAAGAGC
5,LTR2B sgRNA2 R,TTAGCTCTTAAACTTTAAGGGCTCACAACACTACAACAAG
2,LTR2B sgRNA3 F,TTGGACACCGAGTTGTAGAAGGAGTTTAAGAGC
6,LTR2B sgRNA3 R,TTAGCTCTTAAACTCCTTCTACAACTCGGTGTCCAACAAG
3,LTR2B sgRNA4 F,TTGCTTTATTCAGCTGGGAGCATGTTTAAGAGC
7,LTR2B sgRNA4 R,TTAGCTCTTAAACATGCTCCCAGCTGAATAAAGCAACAAG


In [32]:
print('\n'.join(df_ordersheet['Sequence'].to_list()))

TTGTTAACTACTGGGTTTAGGCCGTTTAAGAGC
TTAGCTCTTAAACGGCCTAAACCCAGTAGTTAACAACAAG
TTGTAGTGTTGTGAGCCCTTAAAGTTTAAGAGC
TTAGCTCTTAAACTTTAAGGGCTCACAACACTACAACAAG
TTGGACACCGAGTTGTAGAAGGAGTTTAAGAGC
TTAGCTCTTAAACTCCTTCTACAACTCGGTGTCCAACAAG
TTGCTTTATTCAGCTGGGAGCATGTTTAAGAGC
TTAGCTCTTAAACATGCTCCCAGCTGAATAAAGCAACAAG


In [33]:
!pwd

/gladstone/alexanian/datasets-online/Github/vTEtools/tools_jupyterNotebook


In [17]:
fp = "hLTR2B_sgRNA_order_sheet.xlsx"
df_ordersheet[cols].to_excel(fp, index=False)