In [None]:
import networkx as nx
edgotype = nx.read_gexf("data/y2hEdgotyping/edgotype.gefx")
# edgotype_train = nx.read_gpickle("data/y2hEdgotyping/edgotype_train.gpickle")
# edgotype_val = nx.read_gpickle("data/y2hEdgotyping/edgotype_val.gpickle")
# edgotype_test = nx.read_gpickle("data/y2hEdgotyping/edgotype_test.gpickle")

In [None]:
import os
import pandas as pd
seqFiles = [pd.read_csv(f"data/y2hEdgotyping/uniprotScan/sequence_{i}.tsv",delimiter="\t") for i in range(6)]

uniprotMatches = pd.concat(seqFiles)
def mergeWithUniprot(graph):
    for node in graph.nodes(data=True):
        seq = node[1]["seq"]
        up = uniprotMatches[(uniprotMatches.Sequence == seq) & \
                            (uniprotMatches.Reviewed == "reviewed") & \
                           (uniprotMatches.Organism == "Homo sapiens (Human)")]
        graph.nodes[node[0]]["uniprotMatches"] = up
        alphafoldStructures = []
        for uniprot_id in graph.nodes[node[0]]["uniprotMatches"]["Entry"]:
            fp = f"/data/dzeiberg/alphafold/predictions/AF-{uniprot_id}-F1-model_v4.pdb.gz"
            if os.path.isfile(fp):
                alphafoldStructures.append(fp)
        graph.nodes[node[0]]["alphafoldStructures"] = alphafoldStructures
    return graph

In [None]:
edgotype_x = mergeWithUniprot(edgotype)

In [None]:
paths = []
for ensg,n in edgotype_x.nodes(data=True):
    seq = n["seq"]
    p = f"/data/dzeiberg/ppi/alphafold/{ensg}.faa"
    paths.append(p)
    with open(p,"w") as faa:
        faa.write(f">{ensg}\n{seq}\n")

In [None]:
",".join(paths)

In [None]:
node2Vars = {}
for i,j,e in edgotype_x.edges(data=True):
    db = e["db_ensembl_gene_id_mt"]
    if db not in node2Vars:
        node2Vars[db] = set()
    node2Vars[db].add(e["Substitution"])

In [None]:
pdbs = set()
for _,e in edgotype_x.nodes(data=True):
    pdb = e["uniprotMatches"].PDB.values
    if len(pdb) and type(pdb[0]) is str:
        pdb = pdb[0].split(";")
        pdbs.update(pdb[:-1])

In [None]:
edgotype_x.nodes["ENSG00000223609"]

In [None]:
muts = list(node2Vars["ENSG00000223609"])
with open("/home/dzeiberg/test_rosetta/1si4.mutfile","w") as f:
    f.write(str(len(muts)))
    f.write("\n")
    for v in muts:
        f.write("1\n")
        loc = v[1:-1]
        mut = v[-1]
        f.write(f"{loc} A PIKAA {mut}\n")

In [None]:
with open("/data/dzeiberg/ppi/y2hEdgotyping/foldX/pdb_id_list.txt","w") as f:
    f.write(",".join(pdbs))

In [None]:
import os
import shutil

In [None]:
import subprocess

In [None]:
for db,varstrs in node2Vars.items():
    pdb = edgotype_x.nodes[db]["uniprotMatches"].PDB
    if len(pdb.values) and type(pdb.values[0]) is str:
        pdb_ids = pdb.values[0].strip(";").split(";")
        for i,p_id in enumerate(pdb_ids):
            if os.path.isfile(os.path.join("/data/dzeiberg/ppi/y2hEdgotyping/foldX/pdb_files/",p_id+".pdb")):
                p_dir = os.path.join("/data/dzeiberg/ppi/y2hEdgotyping/foldX/data",db+f"_struct_{i}")
                if not os.path.isdir(p_dir):
                    os.mkdir(p_dir)
                shutil.copy(os.path.join("/data/dzeiberg/ppi/y2hEdgotyping/foldX/pdb_files/",p_id+".pdb"), p_dir)
                subprocess.run(["/data/utilities/bio/foldX/foldx_20231231", "--command=SequenceOnly", f"--pdb={p_id}.pdb"],
                                cwd=p_dir,check=True)
                with open(os.path.join(p_dir,f"SO_{p_id}.fxout")) as so:
                    pdb_seq = so.readlines()[1]
                with open(os.path.join(p_dir,"config_1.cfg"),"w") as f:
                    f.write(f"""command=RepairPDB
    pdb={p_id}.pdb""")
                validvars = [v for v in varstrs if pdb_seq[int(v[1])] == v[0]]
                if len(validvars):
                    positions = ",".join([v[0]+"A"+v[1:] for v in validvars])
                    with open(os.path.join(p_dir,"config_2.cfg"),"w") as f:
                        f.write(f"""command=PositionScan
        pdb={p_id}.pdb
        positions={positions}""")

In [None]:
os.path.isfile(os.path.join(p_dir,f"SO_{p_id}.fxout"))

In [None]:
import pandas as pd

In [None]:
li = []
for n in edgotype_train.nodes(data=True):
    m = n[1]["uniprotMatches"]
#     print(m.PDB.values)
    if len(m.PDB.values) and type(m.PDB.values[0]) is str:
        li.append(m.PDB.values[0].strip(";").split(";"))

In [None]:
pdbids = sum(li,[])

In [None]:
with open("/data/dzeiberg/ppi/y2hEdgotyping/foldX/pdb_ids.txt","w") as f:
    f.write(",".join(pdbids))