In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import math
from itertools import product, permutations, combinations
from sklearn import preprocessing as sp
import pickle as pkl
from tqdm import tqdm

In [2]:
# matplotlib settings
plt.rcParams['figure.facecolor'] = 'white'
plt.rcParams['font.size'] = 12
plt.rcParams['font.family'] = 'Arial'

In [3]:
def mutateSeq(S, mut):
    pos, mut_base = mut           
    
    if pos >= len(S):
        raise Exception("Position out of range") 
    
    if pos == 0:
        return mut_base + S[1:]
    elif pos == len(S)-1:
        return S[:len(S)-1] + mut_base
    else:
        return S[:pos] + mut_base + S[pos+1:]

In [4]:
def makeMutations(S, mut):
    Sx1 = S
    for i in range(0, len(mut[0])):
        Sx2 = mutateSeq(Sx1, (mut[0][i], mut[1][1][i]))
        Sx1 = Sx2
    return Sx1

---
### Prep

In [5]:
# Load the dataset, make helpers
df = pd.read_csv('/home/jardic/Documents/projects/jk/datasets/datasets_prepped/strc_km.csv', usecols=['varseq', 'cpm'])
seq_2_cpm = {s : c for s, c in zip(df['varseq'], df['cpm'])}
seqpool = set(df['varseq'])
seqlen = len(df.iloc[0]['varseq'])
aurora2 = 'AGACATGTTTTGTAAATATGTTGT'

In [6]:
with open('../../prep/mutations_permutations_aurora2.pkl', mode='rb')as mf:
    mutations, mutations_names = pkl.load(mf)

---
### Run analysis

In [7]:
results = []

for m, m_name in tqdm(zip(mutations, mutations_names)):
    
    df['p'] = [''.join([s[x] for x in m[0]]) for s in df['varseq']]
    df_refs = df[df['p'] == m[1][0]]

    for ref_seq in df_refs['varseq']:
        mut_seq = makeMutations(ref_seq, m)
        mut_cpm = seq_2_cpm.get(mut_seq, None)
        ref_cpm = seq_2_cpm.get(ref_seq, None)
        
        if mut_cpm != None:
            cpm_ratio = math.log(mut_cpm / ref_cpm, 10)
            results.append((m_name, cpm_ratio))

35it [03:45,  6.45s/it]


In [8]:
df = pd.DataFrame(results, columns=['mut', 'log_cpm_ratio'])

In [9]:
len(df)

40395476

In [10]:
df.to_csv('epistatsis_in_aurora2_for_violins.csv')