# 1D/2D analysis

In [1]:
# import required modules
import pickle5
import pandas as pd
import numpy as np
import os
import utils

In [2]:
DataPath = "../../run6_full_PBI+PBII_HN168107_600K" # the folder containing the required files
OptFN = "{}/options_full.ini".format(DataPath)
OutDataPath = "{}/ECNet".format(DataPath)
if not os.path.isdir(OutDataPath):
    os.makedirs(OutDataPath)

In [3]:
## load required data
# the options
AllowedPars = ["Partners","RefSeqs"]
OptionsDic = utils.get_OptionsIPyNB(OptFN, AllowedPars)
# the folowing lines are required to correct relative path
for key in OptionsDic.keys():
    MyValue = OptionsDic[key]
    if "../" in MyValue:
        OptionsDic[key] = MyValue.replace("../", "{}/../".format(DataPath))

ComplexesL = utils.double_split(OptionsDic["Partners"], ";", "~")[0]
RefsDic = utils.get_Refs(OptionsDic["RefSeqs"])

# create fasta file with the concatenated sequence
ConcatDNA = ""
ConcatProt = ""
for partner in  ComplexesL:
    ConcatDNA = ConcatDNA + RefsDic[partner][5]
    ConcatProt = ConcatProt + (utils.translate_dna(RefsDic[partner][5])).strip('*')
ConcatFastaFile=open("{}/{}_vs_{}.fa".format(OutDataPath, ComplexesL[0], ComplexesL[1]), "w")
ConcatFastaFile.write("{}+{}_concatenation\n{}".format(ComplexesL[0], ComplexesL[1], ConcatProt))
ConcatFastaFile.close()

# the AAvarsDic
AAvarsDic = {} 
with open(r"{}/SR_AAvars_{}_vs_{}.pkl".format(DataPath,ComplexesL[0], ComplexesL[1]), "rb") as input_file:
    AAvarsDic = pickle5.load(input_file)
    
## Drop irrelevant information
for key in AAvarsDic.keys():
    del AAvarsDic[key]['IndBCData']
    
print("{} single mutant combinations found in SR_AAvars dictionary for complex partners {} (A) and {} (B) !".format(len(AAvarsDic.keys()), ComplexesL[0], ComplexesL[1]))

ALength = len(RefsDic[ComplexesL[0]][5])/3
BLength = len(RefsDic[ComplexesL[1]][5])/3
if RefsDic[ComplexesL[0]][5][-3:] in ['TAA', 'TGA', 'TAG']:
    ALength = ALength -1
if RefsDic[ComplexesL[1]][5][-3:] in ['TAA', 'TGA', 'TAG']:
    BLength = BLength -1    
ConcatLength = ALength + BLength
print("Partner {} (A) length is {:g} and {} (B) length is {:g} ! Concatenated length = {:g}".format(ComplexesL[0], ALength , ComplexesL[1], BLength, ConcatLength))

9503 single mutant combinations found in SR_AAvars dictionary for complex partners VN1551_VHH2 (A) and VN1554_TNFa-2 (B) !
Partner VN1551_VHH2 (A) length is 115 and VN1554_TNFa-2 (B) length is 157 ! Concatenated length = 272


In [4]:
dMutL = list(AAvarsDic.keys()) # list of 2D variant combination
# remove double mutants supported by less than Min_dBCs and SD more than NormEnrichFactor
Min_dBCs = 2
MaxSD_NormEnrichFactor = 0.8
for key in dMutL:
    if ( AAvarsDic[key]['Stats'][2] >= Min_dBCs ) and ( AAvarsDic[key]['Stats'][1] <= (AAvarsDic[key]['Stats'][0] * MaxSD_NormEnrichFactor )):
        AAvarsDic[key] = AAvarsDic[key]['Stats'] # Normalized enrichment, SD and diffent BCs supporting the combination of variants
    else:
        del AAvarsDic[key]

In [5]:
# print some information to see if everything is fine
i = 1
for key in AAvarsDic.keys():
    if i <= 3:
        print("{}:{}".format(key, AAvarsDic[key]))
        i += 1

Y35S:WT:[0.5971950600108359, 0.13085261164192527, 195]
WT:S81F:[0.1876774606185046, 0.0783820918986438, 114]
I57R:K90R:[0.10464463914337035, 0.028960272393192467, 5]


In [6]:
# take position of an mutation
def CorrectPos(dkey):
    dmuta = dkey.split(':')
    for i in range(len(dmuta)):
        muta = dmuta[i]
        pos = 0
        if muta != "WT":
            pos = int(muta[1:-1])
            if i > 0:
                pos = int(ALength + pos)
            dmuta[i] = "".join([muta[0], str(pos), muta[-1]])
        else:
            dmuta[i] = ""

    UpdatedKey = ";".join(dmuta)
    if UpdatedKey[0] == ";":
        UpdatedKey = UpdatedKey[1:]
    if UpdatedKey[-1] == ";":
        UpdatedKey = UpdatedKey[:-1]

    return UpdatedKey


ScoreDic = {}
for key in AAvarsDic.keys():
    if (key != "WT:WT") and ("*" not in key): # we exclude WT interactions because they are not informatives
        UpdatedKey = CorrectPos(key)
        ScoreDic[UpdatedKey] = AAvarsDic[key][0]
    else:
        print(key)

N54*:T89E
N54*:WT
WT:WT
N54*:E135R
N54*:S95T
N54*:S147M
N54*:S147T
N54*:N92G
N54*:T89A
N54*:E135Q
N54*:S147L
N54*:Q88L
N54*:I97W
N54*:N137W
N54*:E135K
N54*:I97A
N54*:I97R
N54*:E146S
N54*:E135V
N54*:L75E
N54*:T79L
N54*:N137L
N54*:S147G
N54*:K90R
N54*:S95E
N54*:N92D
N54*:S81F
N54*:E135W
N54*:Q88F
N54*:E146D
N54*:Q88N
N54*:S81A
N54*:N92A
N54*:E146L
N54*:T89F
N54*:T79R
N54*:K90A
N54*:S81G
N54*:L75F
N54*:K90V
N54*:S95G
N54*:T89K
N54*:T77E
N54*:N92S
N54*:S147K
N54*:E146F
N54*:N137G
N54*:E135G
N54*:T79K
N54*:L75M
N54*:L75G
N54*:E146V
N54*:K90E
N54*:T89L
N54*:S147E
N54*:E135F
N54*:E146M
N54*:S147F
N54*:E135A
N54*:T79S
N54*:N92K
N54*:K90W
N54*:S147N
N54*:N92R
N54*:S81T
N54*:E135L
N54*:Q88G
N54*:S147D
N54*:T77G
N54*:N92W
N54*:E146Q
N54*:T89S
N54*:E146A
N54*:N92F
N54*:E146G
N54*:N92E
N54*:N92L
N54*:K90F
N54*:S95N
N54*:N137E
N54*:Q88R


In [7]:
ScoreDF = pd.DataFrame.from_dict(ScoreDic, orient='index', columns=['score'])
ScoreDF.index.name = "mutation"
ScoreDF.head(10)
ScoreDF.to_csv("{}/{}_vs_{}_scores.tsv".format(OutDataPath, ComplexesL[0], ComplexesL[1]), sep="\t")