# Ion Analysis
In this notebook, we will do all of the analysis for the ions. We will start by computing the purity values associated with those atoms. We will then build new fragments around the ions that improve the purity. We will next develop a buffer region using the interaction spilling equations. Finally, we will set up a QM/MM type run on those atoms.

## Loading Of Data
This basic data structure will store all the matrices required.

In [None]:
from CheSS import Matrices as M

class blockparam:
    def __init__(self, folder):
        '''Initialize the parameters based on files.
        '''
        self.density_file = folder+"/density_kernel_sparse.mtx"
        self.hamiltonian_file = folder+"/hamiltonian_sparse.mtx"
        self.overlap_file = folder+"/overlap_sparse.mtx"
        self.metadata_file = folder+"/sparsematrix_metadata.dat"
        
    def build_lookup(self):
        '''Build a lookup list that gives you the basis functions for each atom.
        '''
        alookup = M.get_atomic_lookup(self.metadata_file)
        self.atom_to_basis = [[] for x in range(0, max(alookup)+1)]
        for basis, atom in enumerate(alookup):
            self.atom_to_basis[atom].append(basis)
    
    def read_matrices(self):
        '''Read in the files, build the overlap inverse.
        '''
        from scipy.io import mmread
        from scipy.sparse.linalg import inv
        self.overlap = mmread(self.overlap_file)
        self.density = mmread(self.density_file)
        self.hamiltonian = mmread(self.hamiltonian_file)
        self.KS = 0.5*self.density.dot(self.overlap).todense()
        self.sinv = inv(self.overlap)
        self.sinvxh = self.sinv.dot(self.hamiltonian)
        self.sinvxh2 = self.sinvxh.dot(self.sinvxh)

In [None]:
data = blockparam("Matrices")
data.build_lookup()
data.read_matrices()
geom_file = "Matrices/1L2Y.yaml"
image_size = (12,4)

In [None]:
import yaml
purity_file = "PurityData/Single/1L2Y.yaml"
with open(purity_file) as ifile:
    bigdft_purity = yaml.load(ifile)

Build the fragment data structure.

In [None]:
from BigDFT import Fragments as F
from yaml import load
with open(geom_file) as ifile:
    sys = load(ifile)
    positions = sys["Reading positions"].itervalues().next()
fdict = F.CreateFragDict(positions)

We also will load the charge values in from BigDFT

In [None]:
def lookup_fragment(fdict, atomlist):
    for fragname in fdict:
        for fragid, atoms in fdict[fragname].items():
            if set(atomlist) == set(atoms):
                return fragname, fragid

In [None]:
cdict = {}
for frag in bigdft_purity["Fragment multipoles"]:
    neutral_charge = frag["Neutral fragment charge"]
    q0 = frag["q0"]
    net_charge = neutral_charge + q0[0]
    atomlist = frag["Atom IDs"]
    fragname, fragid = lookup_fragment(fdict, atomlist)
    if not fragname in cdict:
        cdict[fragname] = {}
    cdict[fragname][fragid] = net_charge

I have also generated the bader charge dipole values for each atom.

In [None]:
bader_file = "bader.yaml"
with open(bader_file) as ifile:
    bader_data = load(ifile)["positions"]
bader_values = []
for atom in bader_data:
    bader_values.append(float(atom["Charge net"]))

## Purity Analysis
Now we will compute the purity values, and in particular we'll focus on the purity values of the Chlorine atoms. First a charge lookup data structure.

In [None]:
from numpy import zeros
electron_lookup = {'H' :1, 'He':2, 
                   'Li':1, 'Be':2, 'B' :3, 'C': 4, 'N':5, 'O':6, 'F' :7, 'Ne':8,
                   'Na':1, 'Mg':2, 'Al':3, 'Si':4, 'P':5, 'S':6, 'Cl':7}
natoms = len(positions["positions"])
charge = zeros((natoms))
for i, p in enumerate(positions["positions"]):
    name = p.keys()[1]
    charge[i] = electron_lookup[name]

A list of fragments.

In [None]:
frag_list = []
for fname in fdict.keys():
    for fid in fdict[fname].keys():
        frag_list.append(fdict[fname][fid])

The subroutine used to compute the purity values.

In [None]:
def compute_purity(param, charge, frag):
    from numpy import zeros
    from numpy import trace
    from scipy.sparse import csr_matrix
    if (len(frag)) == 0:
        return 0
    indices = []
    cv = 0
    for atom in frag:
        indices += param.atom_to_basis[atom-1]
        cv += charge[atom-1]

    submat = param.KS[indices,:]
    submat = submat[:,indices]
    
    return 2*trace(submat.dot(submat) - submat)/cv

Compute and plot.

In [None]:
purity_values = []
for frag in frag_list:
    purity_values.append(compute_purity(data, charge, frag))

In [None]:
from matplotlib import pyplot as plt

fig, ax = plt.subplots(1, 1, figsize=(12,4))
ax.plot(sorted(purity_values), 'x')
ax.set_title("Purity Values")
ax.set_xlabel("Fragment ID")

### The Chlorine Purity Values
Now we will do that same plot but show where the Chlorine atoms sit on that scale.

In [None]:
target_list = []
for fname in fdict.keys():
    if not (fname == "CLA" or fname == "SOD"):
        continue
    for fid in fdict[fname].keys():
        target_list.append(fdict[fname][fid])

In [None]:
target_purity = []
for frag in target_list:
    target_purity.append(compute_purity(data, charge, frag))

In [None]:
from matplotlib import pyplot as plt

# find chlorine values in the sorted list
spv = sorted(purity_values)
x_cl = []
for k in range(0, len(target_list)):
    for i in range(0, len(purity_values)):
        if target_purity[k] == spv[i]:
            x_cl.append(i)
            break

fig, ax = plt.subplots(1, 1, figsize=(12,4))
ax.plot(sorted(purity_values), 'x', markersize=10, label="All Values")
ax.plot(x_cl, target_purity, '.', markersize=25, label="Na/Cl")
ax.set_xlim(-2,len(purity_values))
ax.set_title("Purity Values")
ax.set_xlabel("Fragment ID")
plt.legend(loc="best")

## Refragmentation of Chlorine
Next we will try to add some neighboring water molecules to the Chlorine atoms to improve the purity values. First, a subroutine to get nearest neighbors.

In [None]:
def GetNearest(fragtuple, sysfile, threshold=None, number=None):
    import numpy as np
    sys = F.System(posinp_dict=sysfile)
    for f in sys.fragments:
        if f.id == F.SetFragId(*fragtuple):
            our_frag = f
            break
    distance_array =[F.distance(our_frag, f, cell=sysfile["cell"]) for f in sys.fragments]
    ipiv = np.argsort(distance_array)
    if number:
        shell = ipiv[:number]
    else:
        shell = np.where(np.array(distance_array) < threshold)[0]
    shellid = [sys.fragments[s].id for s in shell]
    return shellid

Fragment tuples.

In [None]:
frag_tuples = []
for fname in fdict.keys():
    if not (fname == "CLA" or fname == "SOD"):
        continue
    for fid in fdict[fname].keys():
        frag_tuples.append((fname, fid))

Next we will compute the purity values for the Chlorine+Water fragments.

In [None]:
merged_purity = []
merged_charge = []
merged_bader = []
for cl in frag_tuples:
    merged_purity.append([])
    merged_charge.append([])
    merged_bader.append([])
    for num_neighbors in range(0, 10):
        neighbor_list = GetNearest(cl, positions, number=num_neighbors+1)
        tlist = []
        total_charge = 0
        for n in neighbor_list:
            fname, fid = n.split(":")
            tlist.extend(fdict[fname][int(fid)])
            total_charge += cdict[fname][int(fid)]
        merged_purity[-1].append(compute_purity(data, charge, tlist))
        merged_charge[-1].append(total_charge)
        total_bader = 0
        for atom in tlist:
            total_bader += bader_values[atom-1]
        merged_bader[-1].append(total_bader)

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(6,6))

marker = ["o--", "d--", "h--"]
marker2 = ["*--", "s--", "8--"]
colors= ['g', 'b', 'black']

for i in range(len(frag_tuples)-1, -1, -1):
    name = frag_tuples[i][0]+str(frag_tuples[i][1])
    ax[0].plot(merged_purity[i], marker[i], label="Purity "+name, color=colors[i], markersize=8)
    ax[1].plot(merged_charge[i], marker[i], label="Mulliken "+name, color=colors[i],  markersize=8)
#     ax[1].plot(merged_bader[i], marker2[i], label="Bader "+name, color=colors[i],  markersize=8)
ax[0].set_xlim(-0.5, len(merged_purity[0]))
ax[1].set_xlim(-0.5, len(merged_purity[0]))
ax[1].set_ylim(-1.1, 1.1)
ax[1].set_xlabel("Added Water Molecules",fontsize=12)
ax[0].set_ylabel("Purity Values",fontsize=12)
ax[1].set_ylabel("Net Charge",fontsize=12)
ax[0].legend(bbox_to_anchor=(1.45,1), ncol=1)
ax[1].legend(bbox_to_anchor=(1.5,1), ncol=1)
plt.savefig("Pictures/ChlorineRefrag.eps", bbox_inches='tight')