# **Supplementary Code 3**
This notebook was used for designing library for pegRNA abundance screening. For more detail, please read Methods and Supplementary Information. 

Lead contact: Hyoungbum Henry Kim (hkim1@gmail.com)

Technical contact: Goosang Yu (gsyu93@gmail.com), Yusang Jung (ys.jung@yuhs.ac)

## Analysis

### Step 1: Amino acid check
우리는 library design을 했을 때, intended edit을 가지는 pegRNA를 먼저 디자인 한 다음에 추가 synonymous mutation을 도입하는 방식을 사용했음.  
이 때, synonymous mutation이 들어갔을 때 실제로 amino acid가 intended edit only sequence와 같은지 확인하기 위한 코드 


In [3]:
import sys, os, time
import pandas as pd
from Bio.Seq import Seq

In [35]:
exon  = 9
buff  = 65 # 항상 CDS 앞에 65nt (Index 60nt + splice site 5nt)가 들어있는 것을 제외. Amino acid가 나오는 부분만 보고 싶기 때문. 
frame = 2  # 만약 exon의 가장 앞 쪽의 codon이 이전 exon에서 잘려서 들어있다면, 그 것은 제외하고 확인한다. 

start = buff + frame

df = pd.read_csv(f'./data/abl1_exon_refseq/e{exon}.csv')
df

Unnamed: 0,SynPE,Intended_only
0,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...
1,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...
2,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...
3,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...
4,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...
...,...,...
1027,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...
1028,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...
1029,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...
1030,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...


In [36]:
list_synonype = []
list_intended = []

for i in df.index:

    data = df.iloc[i]

    synpe_ref    = str(Seq(data[0][start:]).translate())
    intended_ref = str(Seq(data[1][start:]).translate())

    list_synonype.append(synpe_ref)
    list_intended.append(intended_ref)

df['SynonyPE_AA'] = list_synonype
df['Intended_AA'] = list_intended

df


Unnamed: 0,SynPE,Intended_only,SynonyPE_AA,Intended_AA
0,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...
1,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...
2,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...
3,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...
4,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...
...,...,...,...,...
1027,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...
1028,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...,WQWNPSDRPSFAEIHQAFETMFQESSISDGKVPIPGYLQWGERAAM...
1029,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,WQWNPSDRPSFAEIHQAFETMFQESSISDGNVPIPGYLQWGERAAM...,WQWNPSDRPSFAEIHQAFETMFQESSISDGNVPIPGYLQWGERAAM...
1030,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,AACTGCTAGCCCCGTATTGCTAGCCAGATCTCATGGATGATCTGAC...,WQWNPSDRPSFAEIHQAFETMFQESSISDGNVPIPGYLQWGERAAM...,WQWNPSDRPSFAEIHQAFETMFQESSISDGNVPIPGYLQWGERAAM...


In [37]:
df.to_excel(f'./output/ex{exon}_AA.xlsx', index=False)