In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import os
import sys
import itertools
import MDAnalysis as mda
import MDAnalysisTests as mdtest

In [3]:
class helpers():
    class files():
        def read_file(pdb_file):
            lines = open(pdb_file, 'r').readlines()
            lines = [k for k in lines if "REMARK" not in k]
            lines = [k for k in lines if "TER" not in k]
            lines = [k for k in lines if "TITLE" not in k]
            lines = [k for k in lines if "CRYST1" not in k]
            lines = [k for k in lines if "SCALE" not in k]
            lines = [k for k in lines if "TER" not in k]
            #lines = [k.replace("\n", '') for k in lines]
            return lines
        
        def write_file(file, lines):
            f = open(file, mode="w", encoding="utf-8")
            f.writelines(lines)
            f.close()
            return 


    class lines():
        def read_pdb_line(line):
            line_dict = {
                "atom": line[0:6],
                "serial_no": line[6:12],
                "atom_name": line[12:16],
                "resname": line[17:21],
                "chain_ID": line[21],
                "resi_sequence_no": line[22:27],
                "x_coord": line[31:38],
                "y_coord": line[39:46],
                "z_coord": line[47:54],
                "occupancy": line[55:60],
                "temp_fac": line[60:66],
                "segment": line[72:76],
                "element_symbol": line[77:78],
            }
            return line_dict

        def create_line(line_dict):
            line = f'{line_dict["atom"]}{line_dict["serial_no"]} {line_dict["atom_name"]} {line_dict["resname"]}{line_dict["chain_ID"]}{line_dict["resi_sequence_no"]}    {line_dict["x_coord"]} {line_dict["y_coord"]} {line_dict["z_coord"]} {line_dict["occupancy"]}{line_dict["temp_fac"]}      {line_dict["segment"]} {line_dict["element_symbol"]}  \n'
            return line

        def fill_serial(serial_no, line_dict):
            if serial_no < 10:
                line_dict["serial_no"] = f"    {serial_no}"
            if serial_no < 100 and serial_no >= 10:
                line_dict["serial_no"] = f"   {serial_no}"
            if serial_no < 1000 and serial_no >= 100:
                line_dict["serial_no"] = f"  {serial_no}"
            if serial_no < 10000 and serial_no >= 1000:
                line_dict["serial_no"] = f" {serial_no}"
            if serial_no >= 10000:
                line_dict["serial_no"] = f"{serial_no}"
            return line_dict
        
        def fill_resi_sequence_no(resi_no, line_dict):
            if resi_no < 10:
                line_dict["resi_sequence_no"] = f"   {resi_no} "
            if resi_no < 100 and resi_no >= 10:
                line_dict["resi_sequence_no"] = f"  {resi_no} "
            if resi_no < 1000 and resi_no >= 100:
                line_dict["resi_sequence_no"] = f" {resi_no} "
            if resi_no <= 9999 and resi_no >=1000:
                line_dict["resi_sequence_no"] = f"{resi_no} "
            return line_dict
        
        def add_terminus(lines):
            if lines[-1] != "TER":
                lines.append("TER")
            return lines

        def exchange_segment(line_dict, segment):
            line_dict["segment"] = segment
            return line_dict

        def exchange_chainID(line_dict, chainID):
            line_dict["chainID"] = chainID
            return line_dict


    class operations():
        def split_segment(pdb_file, segname, pdb_id):
            lines=helpers.files.read_file(pdb_file=pdb_file)
            lines=[k for k in lines if segname in k]
            #lines=helpers.lines.add_terminus(lines)
            helpers.files.write_file(file=f'coords/{pdb_id}_{segname}.pdb', lines=lines)
            return
        
        def split_segments(pdb_file, segnames, pdb_id):
            for segname in segnames:
                helpers.operations.split_segment(pdb_file=pdb_file, segname=segname, pdb_id=pdb_id)
            return

        def split_waterchains(pdb_file, output_name):
            lines=helpers.files.read_file(pdb_file=pdb_file)
            length, counter, filenames=len(lines), 0, []
            while counter < length:
                lines_=lines[0:29997]
                lines_=helpers.lines.add_terminus(lines=lines_)
                filename="coords/"+output_name+f"{counter//29997}.pdb"
                filenames.append(filename)
                helpers.files.write_file(file=filename, lines=lines_)
                lines=lines[29997:]
                counter +=29997
            for filename in filenames:
                helpers.operations.renumber_tip3(pdb_file=filename, pdb_file_output=filename, segment=filename[12:16])
            return

        def fuse_segments(pdb_files, pdb_output):
            lines_=[]
            for pdb_file in pdb_files:
                lines=helpers.files.read_file(pdb_file=pdb_file)
                lines_.append(lines)
                lines=[]
            lines_ = list(itertools.chain(*lines_))
            helpers.files.write_file(file=pdb_output, lines=lines_)
            return

        def add_segment(pdb_file, pdb_file_output, segment):
            lines=helpers.files.read_file(pdb_file=pdb_file)
            lines_ = []
            for line in lines:
                line_dict=helpers.lines.read_pdb_line(line=line)
                line_dict=helpers.lines.exchange_segment(line_dict=line_dict, segment=segment)
                line_=helpers.lines.create_line(line_dict=line_dict)
                lines_.append(line_)
            lines=helpers.lines.add_terminus(lines=lines_)
            helpers.files.write_file(file=pdb_file_output, lines=lines)
            return 

        def add_chainID(pdb_file, pdb_file_output, chainID):
            lines=helpers.files.read_file(pdb_file=pdb_file)
            lines_=[]
            for line in lines:
                line_dict=helpers.lines.read_pdb_line(line=line)
                line_dict=helpers.lines.exchange_chainID(line_dict=line_dict, chainID = chainID)
                line_=helpers.lines.create_line(line_dict=line_dict)
                lines_.append(line_)
            lines=helpers.lines.add_terminus(lines=lines_)
            helpers.files.write_file(file=pdb_file_output, lines=lines)
            return

        def change_temp_factors(pdb_file, restraints_file):
            lines=helpers.files.read_file(pdb_file=pdb_file)
            lines_ = []
            for line in lines:
                line_dict=helpers.lines.read_pdb_line(line)
                if line_dict["atom_name"].startswith("H"):
                    line_dict["temp_fac"] = "  0.00"
                else: 
                    if line_dict["atom_name"].startswith("C") and not line_dict["atom_name"].startswith("CA"):
                        line_dict["temp_fac"] = "  0.50"
                    else:
                        line_dict["temp_fac"] = "  1.00"
                line_ = helpers.lines.create_line(line_dict=line_dict)
                lines_.append(line_)
                if line.startswith("TER"):
                    line_ = line
                    lines_.append("line_")
            helpers.files.write_file(file=restraints_file, lines=lines_)
            lines=lines_
            return
        
        def renumber(pdb_file, pdb_file_output):
            lines=helpers.files.read_file(pdb_file=pdb_file)
            if len(lines) > 99999:
                raise ValueError("len(lines)>99999. Try again with less atoms.")
            lines_ = []
            serial_no=1
            for line in lines:
                line_dict=helpers.lines.read_pdb_line(line=line)
                line_dict=helpers.lines.fill_serial(serial_no=serial_no, line_dict=line_dict)
                line_=helpers.lines.create_line(line_dict=line_dict)
                lines_.append(line_)
                serial_no+=1
            lines=helpers.lines.add_terminus(lines=lines_)
            helpers.files.write_file(file=pdb_file_output, lines=lines)
            return

        def renumber_tip3(pdb_file, pdb_file_output, segment):
            lines=helpers.files.read_file(pdb_file=pdb_file)
            if len(lines) > 99999:
                raise ValueError("len(lines)>99999. Try again with less atoms.")
            lines_ = []
            serial_no=1
            for line in lines:
                line_dict=helpers.lines.read_pdb_line(line=line)
                line_dict=helpers.lines.fill_serial(serial_no=serial_no, line_dict=line_dict)
                resi_no=((serial_no-1)//3)+1
                helpers.lines.fill_resi_sequence_no(resi_no=resi_no, line_dict=line_dict)
                line_dict["segment"] = segment
                line_=helpers.lines.create_line(line_dict=line_dict)
                lines_.append(line_)
                serial_no+=1
            lines=helpers.lines.add_terminus(lines=lines_)
            helpers.files.write_file(file=pdb_file_output, lines=lines)
            return 


    def __main__(pdb_file, pdb_id):#, add_segments):
        #IF PDB FILE CAME WITHOUT SEGMENT IDENTIFIERS, USE:
        #if add_segments==True:
            #helpers.operations.split_segments(pdb_file=pdb_file, segnames=["ACHA", "BCHA", "META", "EHEM", "FEOH", "GHEM", "MEMB", "TIP3", "IONS"], pdb_id=pdb_id)   
            #helpers.operations.add_segment(pdb_file=f"coords/{pdb_id}_ACHA.pdb", pdb_file_output=f"coords/{pdb_id}_ACHA.pdb", segment="ACHA")
            #helpers.operations.add_segment(pdb_file=f"coords/{pdb_id}_BCHA.pdb", pdb_file_output=f"coords/{pdb_id}_BCHA.pdb", segment="BCHA")
            #helpers.operations.add_segment(pdb_file=f"coords/{pdb_id}_CCHA.pdb", pdb_file_output=f"coords/{pdb_id}_CCHA.pdb", segment="CCHA")
            #helpers.operations.add_segment(pdb_file=f"coords/{pdb_id}_META.pdb", pdb_file_output=f"coords/{pdb_id}_META.pdb", segment="META")
            #helpers.operations.add_segment(pdb_file=f"coords/{pdb_id}_EHEM.pdb", pdb_file_output=f"coords/{pdb_id}_EHEM.pdb", segment="EHEM")
            #helpers.operations.add_segment(pdb_file=f"coords/{pdb_id}_FEOH.pdb", pdb_file_output=f"coords/{pdb_id}_FEOH.pdb", segment="FEOH")
            #helpers.operations.add_segment(pdb_file=f"coords/{pdb_id}_GHEM.pdb", pdb_file_output=f"coords/{pdb_id}_GHEM.pdb", segment="GHEM")
            #helpers.operations.add_segment(pdb_file=f"coords/{pdb_id}_MEMB.pdb", pdb_file_output=f"coords/{pdb_id}_MEMB.pdb", segment="MEMB")
            #helpers.operations.add_segment(pdb_file=f"coords/{pdb_id}_TIP3.pdb", pdb_file_output=f"coords/{pdb_id}_TIP3.pdb", segment="TIP3")
            #helpers.operations.add_segment(pdb_file=f"coords/{pdb_id}_IONS.pdb", pdb_file_output=f"coords/{pdb_id}_IONS.pdb", segment="IONS")
            #pdb_files = [f"coords/{pdb_id}_ACHA.pdb", f"coords/{pdb_id}_BCHA.pdb", f"coords/{pdb_id}_CCHA.pdb", f"coords/{pdb_id}_META.pdb", f"coords/{pdb_id}_EHEM.pdb", f"coords/{pdb_id}_FEOH.pdb", f"coords/{pdb_id}_GHEM.pdb", f"coords/{pdb_id}_MEMB.pdb", f"coords/{pdb_id}_TIP3.pdb", f"coords/{pdb_id}_IONS.pdb"]
            #helpers.operations.fuse_segments(pdb_files=pdb_files, pdb_output=f"coords/{pdb_id}_.pdb")
            #pdb_file=f"coords/{pdb_id}_.pdb"
        
        #SPLIT PDB FILE BASED ON CHAINS THAT WE WANT 
        #OMITTING SEGMENT NAMES IN THE segnames LIST WILL REMOVE THEM FROM FURTHER PROCESSING
        helpers.operations.split_segments(pdb_file=pdb_file, segnames=["ACHA", "BCHA", "CCHA", "META", "EHEM", "FEOH", "GHEM", "OHMI", "MEMB", "TIP3", "IONS"], pdb_id=pdb_id)

        #FIX NUMBERING OF ATOM NUMBERS IF YOU SWITCHED RESIDUE SEQUENCE FROM PDB RES-SEQUENCE TO E.G. RTF-FILE SEQUENCE OF ATOMS
        helpers.operations.renumber(pdb_file=f"coords/{pdb_id}_ACHA.pdb", pdb_file_output=f"coords/{pdb_id}_ACHA.pdb")
        helpers.operations.renumber(pdb_file=f"coords/{pdb_id}_BCHA.pdb", pdb_file_output=f"coords/{pdb_id}_BCHA.pdb")
        helpers.operations.renumber(pdb_file=f"coords/{pdb_id}_META.pdb", pdb_file_output=f"coords/{pdb_id}_META.pdb")
        helpers.operations.renumber(pdb_file=f"coords/{pdb_id}_EHEM.pdb", pdb_file_output=f"coords/{pdb_id}_EHEM.pdb")
        helpers.operations.renumber(pdb_file=f"coords/{pdb_id}_FEOH.pdb", pdb_file_output=f"coords/{pdb_id}_FEOH.pdb")
        helpers.operations.renumber(pdb_file=f"coords/{pdb_id}_GHEM.pdb", pdb_file_output=f"coords/{pdb_id}_GHEM.pdb")
        helpers.operations.renumber(pdb_file=f"coords/{pdb_id}_OHMI.pdb", pdb_file_output=f"coords/{pdb_id}_OHMI.pdb")
        helpers.operations.renumber(pdb_file=f"coords/{pdb_id}_MEMB.pdb", pdb_file_output=f"coords/{pdb_id}_MEMB.pdb")
        helpers.operations.renumber(pdb_file=f"coords/{pdb_id}_IONS.pdb", pdb_file_output=f"coords/{pdb_id}_IONS.pdb")
        
        #FOR LARGE SYSTEMS: SPLIT WATER INTO MULTIPLE CHAINS OF 9999 RESIDUES EACH
        #WATER RENUMBERING HAPPENS INSIDE THE SPLIT WATERCHAINS FUNCTION TO AVOID REDUNDANT USE OF FUNCTIONS
        helpers.operations.split_waterchains(pdb_file=f"coords/{pdb_id}_TIP3.pdb", output_name=pdb_id+"_WAT")

        #REMOVE TRASH
        #os.remove(f"coords/{pdb_id}_.pdb")
        #os.remove(f"coords/{pdb_id}_TIP3.pdb")
        return 

In [6]:
helpers.__main__(pdb_file="coords/Pmoxox/step5_aligned_Pmoxox.pdb", pdb_id="3HB3")

In [None]:
helpers.__main__(pdb_file="coords/Pmredox/step5_aligned_Pmredox.pdb", pdb_id="3HB3")

In [None]:
helpers.__main__(pdb_file="coords/Proxred/step5_aligned_Proxred.pdb", pdb_id="3HB3")

In [5]:
helpers.__main__(pdb_file="coords/Foxox/step5_aligned_Foxox.pdb", pdb_id="3HB3")