# NAFION IONOMER
## `DREIDING` PARAMETRIZATION AND `ESP method of Merz-Kollman` CHARGES

In [1]:
! pwd

/home/kchinas/Documents/PhD/LAMMPS/CL/2ndMODEL/3rdAttmp/ionomer


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='ticks')

import re
import os

def get_file(filename):
    """
    returns raw dataframe from any type of file
    """
    with open(filename, "r") as file:
        file_content = file.readlines()
    df = pd.DataFrame(file_content, columns=['data'])
    return df

In [3]:
! ls

DREIDING.pdf		 O5R9_allatom_optimised_geometry.pdb
generate_mol_file.ipynb  O5R9_allatom_optimized_geometry.lt
ionomer_DREIDING.lammps


In [4]:
raw_ionomer_ATB   = get_file("O5R9_allatom_optimized_geometry.lt")
raw_ionomer_MATSQ = get_file("ionomer_DREIDING.lammps")

In [5]:
def get_ATB_data(ionomer):    
    data = ionomer[ionomer["data"].str.contains(r"\$atom\:[A-Z]\d+\s+\$mol")].reset_index(drop=True)
    data["data"] = data["data"].str.split('\s')
    data["data"] = data["data"].apply(lambda x: list(filter(None, x)))
    data["atom"] = data["data"].apply(lambda x: x[0].replace("$atom:", ""))
    data["elements"] = data["atom"].str.replace(r"\d+", "", regex=True)
    data["gromos elemts"] = data["data"].apply(lambda x: x[2].replace("@atom:", ""))
    data["charge"] = data["data"].apply(lambda x: x[3]).astype(float)
    data["x"] = data["data"].apply(lambda x: x[4]).astype(float)
    data["y"] = data["data"].apply(lambda x: x[5]).astype(float)
    data["z"] = data["data"].apply(lambda x: x[6]).astype(float)
    data.drop("data", axis=1, inplace=True)
    return data

def get_ATB_bonds(ionomer):
    data = ionomer[ionomer["data"].str.contains(r"\$bond\:b")].reset_index(drop=True)
    data["data"] = data["data"].str.split('\s')
    data["data"] = data["data"].apply(lambda x: list(filter(None, x)))
    data["b"] = data["data"].apply(lambda x: x[0].replace("$bond:b", "")) # index
    data["g"] = data["data"].apply(lambda x: x[1].replace("@bond:g", "")) # type
    data["atom1"] = data["data"].apply(lambda x: x[2].replace("$atom:", ""))
    data["atom2"] = data["data"].apply(lambda x: x[3].replace("$atom:", ""))
    data.drop("data", axis=1, inplace=True)
    return data

def get_MATSQ_data(ionomer,slices):
    data = ionomer.loc[slices[0]:slices[1]].copy().reset_index(drop=True)
    data["data"] = data["data"].str.split('\s').apply(lambda x: list(filter(None, x)))
    data["atom id"] = data["data"].apply(lambda x: x[0]).astype(int)
    data["molecule id"] = data["data"].apply(lambda x: x[1]).astype(int)
    data["atom type"] = data["data"].apply(lambda x: x[2]).astype(int)
    data["charge"] = data["data"].apply(lambda x: x[3]).astype(float)
    data["x"] = data["data"].apply(lambda x: x[4]).astype(float)
    data["y"] = data["data"].apply(lambda x: x[5]).astype(float)
    data["z"] = data["data"].apply(lambda x: x[6]).astype(float)
    data["dreiding elemts"] = data["atom type"].map({1:"F_",2:"C_3",3:"O_3",4:"O_2",5:"S_3"})
    return data

def get_MATSQ_bonds(ionomer,slices):
    data = ionomer.loc[slices[0]:slices[1]].copy().reset_index(drop=True)
    data["data"] = data["data"].str.split('\s').apply(lambda x: list(filter(None, x)))
    data["bond id"] = data["data"].apply(lambda x: x[0]).astype(int)
    data["bond type"] = data["data"].apply(lambda x: x[1]).astype(int)
    data["atom1"] = data["data"].apply(lambda x: x[2]).astype(int)
    data["atom2"] = data["data"].apply(lambda x: x[3]).astype(int)
    return data

ATB_structure = get_ATB_data(raw_ionomer_ATB)
MATSQ_structure = get_MATSQ_data(raw_ionomer_MATSQ,[27,524])

MATSQ_bonds = get_MATSQ_bonds(raw_ionomer_MATSQ,[528,1024])
ATB_bonds   = get_ATB_bonds(raw_ionomer_ATB)

##  [C$_{19}$F$_{39}$O$_5$S$^-$](https://atb.uq.edu.au/molecule.py?molid=1723660) AND C$_{152}$F$_{298}$O$_{40}$S$_8^-$ CHARGE COMPARISON 

In [6]:
# ESP method of Merz-Kollman
ATB_structure["charge"].sum()

-0.9999999999999993

In [7]:
#ATB_structure["charge"].unique() # 48 unique charges

In [8]:
# Qeq (Chargue equilibration)
MATSQ_structure["charge"].sum()

-0.000469999999992865

In [9]:
#MATSQ_structure["charge"].unique() # 40 unique charges

### TASK: fix ionomer charges

##### ATB Charge Distribution

* [Ionomer](https://atb.uq.edu.au/viewer.py?molid=1723660)
* [Polymer](https://atb.uq.edu.au/viewer.py?molid=1705448)

In [10]:
ATB_structure["gromos elemts"].unique() 

array(['F', 'CPos', 'OE', 'C', 'SDmso', 'OM'], dtype=object)

In [11]:
ATB_structure[ATB_structure["gromos elemts"].isin(["OM"])].charge.unique() # This is Dreiding O_2

array([-1.019])

In [12]:
ATB_structure[ATB_structure["gromos elemts"].isin(["OE"])].charge.unique() # This is Dreiding O_3
# -0.301 is closer to S
# -0.292 is closer to main chain

array([-0.292, -0.301])

In [13]:
ATB_structure[ATB_structure["gromos elemts"].isin(["F"])].charge.unique() # C - (F)

array([-0.132, -0.11 , -0.107, -0.108, -0.106, -0.104, -0.111, -0.113,
       -0.117, -0.12 , -0.099, -0.075, -0.141, -0.084, -0.072, -0.09 ,
       -0.125, -0.102, -0.142, -0.116, -0.152, -0.092, -0.143, -0.15 ,
       -0.164, -0.17 , -0.159])

In [14]:
ATB_structure[ATB_structure["gromos elemts"].isin(["CPos"])].charge.unique() # (C) - F

array([0.395, 0.203, 0.209, 0.21 , 0.212, 0.206, 0.23 , 0.234, 0.276,
       0.216, 0.189, 0.384, 0.348, 0.256, 0.373, 0.339])

In [15]:
ATB_structure[ATB_structure["gromos elemts"].isin(["C"])]#.charge.unique() # (C) - S

Unnamed: 0,atom,elements,gromos elemts,charge,x,y,z
57,C19,C,C,-0.212,5.169005,2.481924,-0.463252


In [16]:
ATB_structure[ATB_structure["gromos elemts"].isin(["SDmso"])].charge.unique() # S

array([2.878])

##### MATSQ Charge Distribution

In [17]:
MATSQ_structure[MATSQ_structure["dreiding elemts"].isin(["F_"])].charge.unique() # C - F

array([-0.19227, -0.16086, -0.19226, -0.18904, -0.19211, -0.19225,
       -0.1925 , -0.19264, -0.19498, -0.19816, -0.16126, -0.16734,
       -0.19218, -0.16112, -0.16583, -0.17404])

In [18]:
MATSQ_structure[MATSQ_structure["dreiding elemts"].isin(["C_3"])].charge.unique()

array([0.38416, 0.4499 , 0.3845 , 0.40751, 0.38451, 0.38733, 0.38465,
       0.3843 , 0.38243, 0.37963, 0.37461, 0.3515 , 0.44502, 0.41153,
       0.41137, 0.39254, 0.44746, 0.42584, 0.4685 ])

#### For now, I will do a quick dirty fix for the side chains, but later I could try to fix everything based on bonds.

In [19]:
id2elemt = dict(zip(MATSQ_structure["atom id"].values, MATSQ_structure["dreiding elemts"].values))

MATSQ_bonds["elemnt1"] = MATSQ_bonds["atom1"].map(id2elemt)
MATSQ_bonds["elemnt2"] = MATSQ_bonds["atom2"].map(id2elemt)
MATSQ_bonds["flag charge"] = [0]*len(MATSQ_bonds)

### First, changing those associated to Sulfur

In [20]:
MATSQ_bonds.loc[MATSQ_bonds[MATSQ_bonds["elemnt2"].isin(["S_3"])].index,"flag charge"] = 1

In [21]:
update_id_list = pd.Series(list(MATSQ_bonds[MATSQ_bonds["flag charge"].isin([1])]["atom1"].values) + 
                           list(MATSQ_bonds[MATSQ_bonds["flag charge"].isin([1])]["atom2"].values)).unique()

update = MATSQ_structure[MATSQ_structure["atom id"].isin(update_id_list)]

MATSQ_structure.loc[update[update["dreiding elemts"].isin(["C_3"])].index,"charge"] = \
                ATB_structure[ATB_structure["gromos elemts"].isin(["C"])].charge.unique()[0]

MATSQ_structure.loc[update[update["dreiding elemts"].isin(["S_3"])].index,"charge"] = \
                ATB_structure[ATB_structure["gromos elemts"].isin(["SDmso"])].charge.unique()[0]

MATSQ_structure.loc[update[update["dreiding elemts"].isin(["O_2"])].index,"charge"] = \
                ATB_structure[ATB_structure["gromos elemts"].isin(["OM"])].charge.unique()[0]

### Second, changing those associated to Oxygen close to the PTFE backbone

In [22]:
MATSQ_bonds.loc[MATSQ_bonds[MATSQ_bonds["elemnt2"].isin(["O_3"])].index,"flag charge"] = 2

#### Next task: discern which O is closer to S and which one is closer to the PTFE backbone. 

For this, I am also going to change the other Carbon charge that is in between S and O. 

In [23]:
update = MATSQ_bonds[MATSQ_bonds["flag charge"].isin([1]) & MATSQ_bonds["elemnt1"].isin(["C_3"])]
update_id_list = list(update["atom1"].values)

In [24]:
# All atoms bonded to the updated carbon is flag #3
MATSQ_bonds.loc[MATSQ_bonds[MATSQ_bonds["atom2"].isin(update_id_list) & 
                            MATSQ_bonds["elemnt1"].isin(["C_3"])].index, "flag charge"] = 3
# They should have C18 charge
second_C_list = list(MATSQ_bonds[MATSQ_bonds["flag charge"].isin([3])]["atom1"].values) 

MATSQ_structure.loc[MATSQ_structure[MATSQ_structure["atom id"].isin(second_C_list)].index,"charge"] = \
                ATB_structure[ATB_structure["atom"].isin(["C18"])]["charge"].values[0]

Now, I will update the O charges based on which ones is closer to S and the PTFE backbone, respectively.

In [25]:
O_close_to_S = list(MATSQ_bonds[MATSQ_bonds["flag charge"].isin([2]) & 
                           MATSQ_bonds["atom1"].isin(second_C_list)]["atom2"].values)

MATSQ_structure.loc[MATSQ_structure[MATSQ_structure["atom id"].isin(O_close_to_S)].index,"charge"] = \
                ATB_structure[ATB_structure["gromos elemts"].isin(["OE"])].charge.unique()[0]

In [27]:
O_close_to_PTFE = MATSQ_bonds[MATSQ_bonds["flag charge"].isin([2]) & 
                    ~MATSQ_bonds["atom2"].isin(O_close_to_S)]["atom2"].unique()

MATSQ_structure.loc[MATSQ_structure[MATSQ_structure["atom id"].isin(O_close_to_PTFE)].index,"charge"] = \
                ATB_structure[ATB_structure["gromos elemts"].isin(["OE"])].charge.unique()[1]

In [32]:
# Now, the molecule has a net charge closer to the one expected
MATSQ_structure["charge"].sum()

-7.542369999999991

# REMARKS | [GROMOS 54A7 PARAMETRIZATION](https://atb.uq.edu.au/download.py?lt=54A7)

Bonding Characteristics

* S=O: g13: $k_{harmonic}$ ~ 459, $r_0$ ~ 1.36
* C-O(C): g15: $k_{harmonic}$ ~ 399, $r_0$ ~ 1.39
* S-O(H): g28: $k_{harmonic}$ ~ 299, $r_0$ ~ 1.61

### NEXT TASK: create mol file for LAMMPS usage.