In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from Bio import SeqIO

In [None]:
os.chdir('runs/prod')

### Before TP detection

In [None]:
my_prots_fasta = '/mnt/DATA/School/2022-2023/Thesis/Scripting/Mining/all.fasta'
my_prots = list(SeqIO.to_dict(SeqIO.parse(my_prots_fasta, 'fasta')).keys())
my_prots = [''.join(s.split('-')) for s in my_prots]
my_prots = ['_'.join(s.split('/')) for s in my_prots]

runstats = pd.read_csv('run.scores.log', sep="\t", skiprows=list(range(1,11)))
runstats = runstats[runstats['PDB name'].isin(my_prots)]
runstats

In [None]:
len(runstats[runstats['pLDDT'] < 70])

In [None]:
len(runstats[runstats['pTM'] < 0.7])

In [None]:
bins = 55
fig, ax = plt.subplots()
plddt = runstats['pLDDT']
ax.hist(plddt, bins=np.linspace(min(plddt), max(plddt), bins))
ax.axvline(70, ls='--', color='red')
plt.xlabel('pLDDT [%]', fontsize=18)
plt.ylabel('Count [-]', fontsize=18)
ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)
ax.minorticks_on()
plt.tight_layout()
plt.savefig("pLDDT_prod.svg")

In [None]:
bins = 55
fig, ax = plt.subplots()
ptm = runstats['pTM']
ax.hist(ptm, bins=np.linspace(min(ptm), max(ptm), bins))
ax.axvline(0.7, ls='--', color='red')
plt.xlabel('pTM [-]', fontsize=18)
plt.ylabel('Count [-]', fontsize=18)
ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)
ax.minorticks_on()
plt.tight_layout()
plt.savefig("pTM_prod.svg")

### After TP detection

In [None]:
tps_fasta = '/mnt/DATA/School/2022-2023/Thesis/Scripting/Mining/all.TP.fasta'
tps = list(SeqIO.to_dict(SeqIO.parse(tps_fasta, 'fasta')).keys())
tps = [''.join(s.split('-')) for s in tps]
tps = ['_'.join(s.split('/')) for s in tps]

#### FPs

In [None]:
runstats_fp = runstats[~runstats['PDB name'].isin(tps)]
runstats_fp

In [None]:
runstats_fp.shape[0]

In [None]:
bins = 55
fig, ax = plt.subplots()
plddt = runstats_fp['pLDDT']
ax.hist(plddt, bins=np.linspace(min(plddt), max(plddt), bins))
ax.axvline(70, ls='--', color='red')
plt.xlabel('pLDDT [%]', fontsize=18)
plt.ylabel('Count [-]', fontsize=18)
ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)
ax.minorticks_on()
plt.tight_layout()
plt.savefig("pLDDT_prod_FP.svg")

In [None]:
bins = 55
fig, ax = plt.subplots()
ptm = runstats_fp['pTM']
ax.hist(ptm, bins=np.linspace(min(ptm), max(ptm), bins))
ax.axvline(0.7, ls='--', color='red')
plt.xlabel('pTM [-]', fontsize=18)
plt.ylabel('Count [-]', fontsize=18)
ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)
ax.minorticks_on()
plt.tight_layout()
plt.savefig("pTM_prod_FP.svg")

#### TPs

In [None]:
runstats_tp = runstats[runstats['PDB name'].isin(tps)]
runstats_tp

In [None]:
len(runstats_tp[runstats_tp['pLDDT'] < 70])

In [None]:
len(runstats_tp[runstats_tp['pTM'] < 0.7])

In [None]:
bins = 55
fig, ax = plt.subplots()
plddt = runstats_tp['pLDDT']
ax.hist(plddt, bins=np.linspace(min(plddt), max(plddt), bins))
ax.axvline(70, ls='--', color='red')
plt.xlabel('pLDDT [%]', fontsize=18)
plt.ylabel('Count [-]', fontsize=18)
ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)
ax.minorticks_on()
plt.tight_layout()
plt.savefig("pLDDT_prod_TP.svg")

In [None]:
bins = 55
fig, ax = plt.subplots()
ptm = runstats_tp['pTM']
ax.hist(ptm, bins=np.linspace(min(ptm), max(ptm), bins))
ax.axvline(0.7, ls='--', color='red')
plt.xlabel('pTM [-]', fontsize=18)
plt.ylabel('Count [-]', fontsize=18)
ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)
ax.minorticks_on()
plt.tight_layout()
plt.savefig("pTM_prod_TP.svg")