In [1]:
import pandas as pd
import math
from Bio import SeqIO

---
### CamSol

In [2]:
def camsol_score(name):
    camsol_df = pd.read_csv('CamSol_'+name+'.txt', sep='\t')
    camsol_scores = []
    for i in camsol_df.index:
        camsol_scores.append((camsol_df.loc[i, "protein variant score"]/camsol_df.loc[camsol_df["Name"] == "ADan", "protein variant score"]).item())
    df = pd.DataFrame()
    df['ID'] = camsol_df.Name.tolist()
    df['CamSol_score'] = camsol_scores
    df.to_csv('CamSol_'+name+'_scores.csv')
    return df

ADan_camsol = camsol_score("ADan_2_df")
ADan_camsol.head()

Unnamed: 0,ID,CamSol_score
0,ADan,1.0
1,E-1-K,1.015684
2,E-1-N,0.90017
3,E-1-T,0.701321
4,E-1-R,1.028204


In [3]:
ADan_camsol.to_csv("ADan_2_df_camsol_score.csv")

---
### AMYLPred-FRL

In [4]:
def amypred_result(name):
    amypred = pd.read_csv('AMYPred-FRL_'+name+'.txt', sep='\t')
    amypred['Amypred_score'] = amypred.apply(lambda x: math.log10(x['Probability']/amypred.loc[amypred['Name'] == 'ADan', 'Probability']), axis=1)
    amypred["ID"] = amypred["Name"]
    amypred = amypred[['ID', 'Amypred_score']]
    return amypred

ADan_amypred = amypred_result("ADan_2_df")
ADan_amypred.head()

Unnamed: 0,ID,Amypred_score
0,ADan,0.0
1,E-1-K,0.009865
2,E-1-N,0.008003
3,E-1-T,0.013565
4,E-1-R,0.012182


In [5]:
ADan_amypred.to_csv("ADan_2_df_amypred_score.csv")

---
### Tango

In [5]:
def tango_runfile(fasta_file, name):
    with open(name+'_tango.bat', 'w') as f:
        for record in SeqIO.parse(fasta_file, "fasta"):             
            f.write('tango '+str(record.id)+' nt="N" ct="N" ph="7.4" te="310" io="0.05" tf="0" seq="'+str(record.seq)+'"')
            f.write('\n')

tango_runfile("ADan_2_df.fasta", "ADan")


In [8]:
def tango_score(fasta_file, Name):
    df_wt = pd.read_csv("Tango/"+Name+".txt", sep='\t')
    wt_tango_score = df_wt.Aggregation.sum()+1
    tango_scores = []
    variants = []
    for record in SeqIO.parse(fasta_file, "fasta"):
        mutant_ = pd.read_csv("Tango/"+str(record.id)+".txt", sep='\t')
        mutant_tango_score = mutant_.Aggregation.sum()+1
        mutant_tango_score_norm = math.log10(mutant_tango_score/wt_tango_score)
        tango_scores.append(mutant_tango_score_norm)
        variants.append(str(record.id))
    df = pd.DataFrame()
    df['ID'] = variants
    df['Tango_score'] = tango_scores
    df.drop_duplicates(inplace=True)
    return df

df = tango_score("ADan_2_df.fasta", "ADan")
df.head()

Unnamed: 0,ID,Tango_score
0,ADan,0.0
1,E-1-K,-0.006985
2,E-1-N,-0.00724
3,E-1-T,-0.006985
4,E-1-R,-0.006985


In [9]:
df.to_csv("ADan_2_df_tango_score.csv")