In [1]:
from pathlib import Path
import numpy as np

def lines2xyz(lines):
    return np.array([_.split()[1:] for _ in lines], dtype="float")

def lines2atomnames(lines):
    return [_.split()[0] for _ in lines]


In [2]:
def get_pdb(fileLines, resids, sys):
    header = """HEADER
TITLE
REMARK
"""
    pdb_format = (
        "{:6s}{:5d} {:<4s}{:1s}{:4s}{:1s}{:4d}{:1s}   "
        "{:8.3f}{:8.3f}{:8.3f}{:6.2f}{:6.2f}"
        "          {:>2s}{:2s}\n"
    )
    _str = header
    last_resid = resids[0]
    res_id_count = 0
    res_ids_ = []
    restypes = resids
    
    atomnames = lines2atomnames(fileLines)
    xyzs = lines2xyz(fileLines)

    for i, line in enumerate(fileLines):
        AN = atomnames[i]
        RESNAME = restypes[i]

        if AN == "Cl":
            print(i, line)
            if atomnames[i - 1] == "CL1":
                AN = "CL2"
                atomnames[i] = "CL2"
            else:
                AN = "CL1"
                atomnames[i] = "CL1"

            print(atomnames[i], AN)

        if RESNAME == "CLA":
            AN = "CLA"
            atomnames[i] = "CLA"
        if RESNAME == "CLA":
            AN = "CLA"
            atomnames[i] = "CLA"
            
        if RESNAME == "DCM" and atomnames[i] == "C":
                res_id_count += 1
            
        if RESNAME == "DCM" and AN.__contains__("H"):
            if atomnames[i - 1].__contains__("H"):
                print("H2")
                AN = "H2"
                atomnames[i] = "H" #"H2"
            else:
                AN = "H1"
                atomnames[i] = "H" #"H1"
                print("H1")

        if RESNAME == "TIP3":
            if atomnames[i].startswith("H"):
                atomnames[i] = "HT"
                if i == 1 or i == 4:
                    AN = "H1"
                else:
                    AN = "H2"
                if sys == "water":
                    if (i+1) % 3:
                        AN = "H1"
                    else:
                        AN = "H2"
            if atomnames[i].startswith("O"):
                atomnames[i] = "OT"
                AN = "OH2"
                if sys == "water":
                    res_id_count += 1

        if resids[i] != last_resid:
            res_id_count += 1
            last_resid = resids[i]

        RESTYPE = restypes[i].upper()
        
        if AN == "K" or AN == "Po" or AN=="POT":
            AN = "POT"
            atomnames[i] = "K"
            RESTYPE = "POT"

        _1 = "ATOM"
        _2 = i + 1
        _3 = AN.upper()
        _4 = ""
        _5 = RESTYPE
        _6 = ""
        _7 = res_id_count  # resids[i]
        _8 = ""
        _9 = xyzs[i, 0]
        _10 = xyzs[i, 1]
        _11 = xyzs[i, 2]
        _12 = 0.0
        _13 = 0.0
        _14 = atomnames[i]
        _15 = " "
        _ = pdb_format.format(
            _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15
        )
        _str += _
    _str += "END"
    return _str

In [5]:
revPath = Path("/home/boittier/mike")

In [8]:
tip3 = revPath / "water_redo.xyz"

In [9]:
tip3_f = open(tip3).readlines()
test_lines = tip3_f[2:62]

In [12]:
"""
water test systems
"""
for i in range(500):
    lines = tip3_f[2+62*i:62+62*i]
    resids = ["TIP3"]*60
    pdb_str = get_pdb(lines, resids, "water")    
    with open(f"tmppdbs/test{i}.pdb", "w") as f:
        f.writelines(pdb_str)
        
    

In [26]:
dcm = revPath / "sampledDCM.xyz"

In [46]:
dcm_f = open(dcm).readlines()

for i in range(20):
    test_lines = dcm_f[2+102*i:102+102*i]
    resids = ["DCM"] * len(test_lines)
    test_str = get_pdb(test_lines, resids, "dcm")
    with open(f"tmppdbs/100_{i}_DCM_{i}_{i}.pdb", "w") as f:
        f.writelines(test_str)


1  Cl        8.723000      -12.628000       13.034000

CL1 CL1
2  Cl        8.612000      -15.174000       14.464000

CL2 CL2
H1
H2
6  Cl       15.935000      -13.998000        8.561000

CL1 CL1
7  Cl       18.135000      -15.881000        8.819000

CL2 CL2
H1
H2
11  Cl        1.572000       -8.927000        6.020000

CL1 CL1
12  Cl        4.212000       -9.630000        6.396000

CL2 CL2
H1
H2
16  Cl       10.714000       -5.203000       11.930000

CL1 CL1
17  Cl       10.823000       -8.139000       12.208000

CL2 CL2
H1
H2
21  Cl        2.309000      -12.409000       15.442000

CL1 CL1
22  Cl        3.057000      -14.200000       17.690001

CL2 CL2
H1
H2
26  Cl        6.089000      -15.111000        5.993000

CL1 CL1
27  Cl        6.738000      -13.285000        3.989000

CL2 CL2
H1
H2
31  Cl        2.804000       -8.524000       10.712000

CL1 CL1
32  Cl        3.545000      -11.314000       11.295000

CL2 CL2
H1
H2
36  Cl        7.871000       -7.743000       16.218000

CL1 CL1
37

In [49]:
cla = revPath / "sampledCLA.xyz"

In [51]:
cla_f = open(cla).readlines()
test_lines = cla_f[2:39]

In [59]:
resids = ["TIP3"] * 36
resids.append("CLA")

for i in range(20):
    test_lines = cla_f[2+39*i:39+39*i]
    test_str = get_pdb(test_lines, resids, "water")
    with open(f"tmppdbs/CLA_{i}_{i}.pdb", "w") as f:
        f.writelines(test_str)
        
        

36  Cl        6.351150        0.841290       -1.790630

CL1 CL1
36  Cl        6.308930        0.686374       -1.823702

CL1 CL1
36  Cl        6.032403        0.418515       -1.442074

CL1 CL1
36  Cl        6.195078       -0.101991       -2.195251

CL1 CL1
36  Cl        6.098424        0.506749       -2.135354

CL1 CL1
36  Cl        6.376487        0.117581       -1.692223

CL1 CL1
36  Cl        6.052575        0.289121       -2.102545

CL1 CL1
36  Cl        5.608980        0.385296       -1.965489

CL1 CL1
36  Cl        5.736208        0.650684       -1.987683

CL1 CL1
36  Cl        6.143481        0.378848       -1.585808

CL1 CL1
36  Cl        5.964906        0.245239       -1.822106

CL1 CL1
36  Cl        6.224734       -0.049902       -2.159942

CL1 CL1
36  Cl        6.394976       -0.078203       -1.416623

CL1 CL1
36  Cl        6.409546        0.033264       -1.503508

CL1 CL1
36  Cl        5.688294        0.456845       -1.751840

CL1 CL1
36  Cl        5.856779        0.605579  

In [75]:
pot = revPath / "sampledPOT.xyz"

In [76]:
pot_f = open(pot).readlines()
test_lines = pot_f[2:39]

In [79]:
resids = ["TIP3"] * 36
resids.append("POT")

for i in range(28):
    test_lines = pot_f[2+39*i:39+39*i]
    test_str = get_pdb(test_lines, resids, "water")
    with open(f"tmppdbs/POT_{i}_{i}.pdb", "w") as f:
        f.writelines(test_str)
        
        

In [80]:
# test_lines
print(test_str)

HEADER
TITLE
REMARK
ATOM      1 OH2  TIP3    1       0.805 -11.134   4.037  0.00  0.00          OT  
ATOM      2 H1   TIP3    1       0.639 -11.886   3.445  0.00  0.00          HT  
ATOM      3 H2   TIP3    1       0.925 -11.535   4.921  0.00  0.00          HT  
ATOM      4 OH2  TIP3    2       0.473  -6.757   3.665  0.00  0.00          OT  
ATOM      5 H1   TIP3    2       0.354  -6.435   2.791  0.00  0.00          HT  
ATOM      6 H2   TIP3    2       1.345  -6.455   3.919  0.00  0.00          HT  
ATOM      7 OH2  TIP3    3      -0.752 -12.859   8.439  0.00  0.00          OT  
ATOM      8 H1   TIP3    3       0.018 -13.420   8.433  0.00  0.00          HT  
ATOM      9 H2   TIP3    3      -0.513 -12.286   7.725  0.00  0.00          HT  
ATOM     10 OH2  TIP3    4      -1.703  -9.106   2.415  0.00  0.00          OT  
ATOM     11 H1   TIP3    4      -0.908  -8.850   1.925  0.00  0.00          HT  
ATOM     12 H2   TIP3    4      -2.336  -8.522   1.986  0.00  0.00          HT  
ATOM    