# Fragment Maker
A python notebook for generating new fragmentations of systems.

## Setup

Begin setting up the data.

In [11]:
from BigDFT import Fragments
reload(Fragments)

<module 'BigDFT.Fragments' from '/bigdft-sdk/Build/install/lib/python2.7/site-packages/BigDFT/Fragments.pyc'>

### Select Which Dataset and Fragment Cutoff

In [12]:
import ipywidgets as widgets
from glob import glob
from os.path import basename, splitext
from numpy import linspace

datasets = [basename(x) for x in glob("Geometry/*")]
w = widgets.Dropdown(
    options=datasets,
    description='Data Set:',
)
display(w)

space = linspace(0.01,0.1,10)
cutoffdrop = widgets.Dropdown(
    options=space,
    value=space[int(len(space)/2)-1],
    description='Purity Cutoff:',
)

display(cutoffdrop)

style = {'description_width': 'initial'}
mergedrop = widgets.Dropdown(
    options=list(range(0,10)),
    description="Number of Neighbors:",
    style=style)

display(mergedrop)

Dropdown(description=u'Data Set:', options=('TimeStep', 'Single', 'Size'), value='TimeStep')

RHJvcGRvd24oZGVzY3JpcHRpb249dSdQdXJpdHkgQ3V0b2ZmOicsIGluZGV4PTQsIG9wdGlvbnM9KDAuMDEsIDAuMDIwMDAwMDAwMDAwMDAwMDA0LCAwLjAzMDAwMDAwMDAwMDAwMDAwNiwgMC7igKY=


RHJvcGRvd24oZGVzY3JpcHRpb249dSdOdW1iZXIgb2YgTmVpZ2hib3JzOicsIG9wdGlvbnM9KDAsIDEsIDIsIDMsIDQsIDUsIDYsIDcsIDgsIDkpLCBzdHlsZT1EZXNjcmlwdGlvblN0eWxlKGTigKY=


In [13]:
from IPython.display import Javascript
def run_all(ev):
    display(Javascript('IPython.notebook.execute_cells_below()'))

button = widgets.Button(description="Run Parameters")
button.on_click(run_all)
display(button)

<IPython.core.display.Javascript object>

Button(description=u'Run Parameters', style=ButtonStyle())

Button(description=u'Run Parameters', style=ButtonStyle())

Button(description=u'Run Parameters', style=ButtonStyle())

### Load All The Data

In [14]:
# Basic Parameters
namelist = []
dataset = w.value
cutoff = cutoffdrop.value
neighbors = mergedrop.value
for f in glob("Geometry/"+dataset+"/*.pdb"):
    namelist.append(splitext(basename(f))[0])

This has to load both the the purity values and the fragment information into matching types of dictionaries.

In [15]:
from yaml import load

prog = widgets.IntProgress(
    max=2*len(namelist),
    description='Loading:',
)
display(prog)

def lookup_fragment(fdict, atomlist):
    for fragname in fdict:
        for fragid, atoms in fdict[fragname].items():
            if set(atomlist) == set(atoms):
                return fragname, fragid

# A dictionary of systems for each file.
purity_dicts = {}
frag_dicts = {}
total_frags = {}
total_atoms = {}

for f in namelist:
    # Load The Geometry
    with open("Geometry/"+dataset+"/"+f+".yaml") as ifile:
        values = load(ifile)
        positions = values["Reading positions"].itervalues().next()
        frag_dicts[f] = positions
        
    prog.value += 1
    # Load The Purity Values
    val = {}
    total_frag = 0
    total_atom = 0
    dictionary = Fragments.CreateFragDict(positions)
    with open("PurityData/"+dataset+"/"+f+".yaml") as ifile:
        values = load(ifile)
        for frag in values["Fragment multipoles"]:
            purity = frag["Purity indicator"]
            atomlist = frag["Atom IDs"]
            fragname, fragid = lookup_fragment(dictionary, atomlist)
            if not fragname in val:
                val[fragname] = {}
            val[fragname][fragid] = purity
            total_frag += 1
            total_atom += len(atomlist)
    total_frags[f] = total_frag
    total_atoms[f] = total_atom
    purity_dicts[f] = val
    prog.value += 1
    
prog.bar_style="success"
prog.description="Finished"

IntProgress(value=0, description=u'Loading:', max=18)

## Generate New List

First we'll find which fragments have too high of a purity value, get their nearest neighbor, and add it to a merge list.

In [16]:
merge_targets = {}
for f in namelist:
    vals = []
    for fragname in purity_dicts[f]:
        for fragid, purity in purity_dicts[f][fragname].items():
            if abs(purity) > cutoff:
                vals.append({"Fragid":(fragname, fragid), "Value":purity})
    merge_targets[f] = vals

Code to get the nearest neighbors.

In [17]:
def GetNearest(fragtuple, sysfile, threshold=None, number=None):
    import numpy as np
    sys = FGetNearestragments.System(posinp_dict=sysfile)
    for f in sys.fragments:
        if f.id == Fragments.SetFragId(*fragtuple):
            our_frag = f
            break
    distance_array =[Fragments.distance(our_frag, f, cell=sysfile["cell"]) for f in sys.fragments]
    ipiv = np.argsort(distance_array)
    if number:
        shell = ipiv[:number]
    else:
        shell = np.where(np.array(distance_array) < threshold)[0]
    shellid = [sys.fragments[s].id for s in shell]
    return shellid

Merge everything together

In [18]:
def CreateMergeList(neighborid):
    return [Fragments.GetFragTuple(f) for f in neighborid]

merged_dicts = {}
for f in namelist:
    base_id = total_frags[f]
    counter = 0
    merged_dicts[f] = {}
    fdict = Fragments.CreateFragDict(frag_dicts[f])
    for target in merge_targets[f]:
        merged = CreateMergeList(GetNearest(target["Fragid"], frag_dicts[f], number=neighbors+1))
#         merged = CreateMergeList(GetNearest(target["Fragid"], frag_dicts[f], threshold=neighbors))
        # Add A New One
        newname = ''.join([x[0] for x in merged])
        newid = base_id + counter
        newindices = []
        for fragname, fragid in merged:
            newindices.extend(fdict[fragname][fragid])
        if not newname in merged_dicts[f]:
            merged_dicts[f][newname] = {}
        merged_dicts[f][newname][newid] = newindices
        counter = counter + 1

## Write To File

Convert to a list of lists.

In [19]:
frag_lists = {}
for f in namelist:
    new_list = []
    fdict = merged_dicts[f]
    for key, value in fdict.items():
        for key2, value2 in fdict[key].items():
            new_list.append(value2)
    frag_lists[f] = new_list


Write.

In [20]:
import os
from os.path import exists
from os import makedirs

if not os.path.exists("NewFragment/"+dataset):
    os.makedirs("NewFragment/"+dataset)

merge_file = "NewFragment/"+dataset+"/"+f+"-"+str(cutoff)+"-"+str(neighbors)+".yaml"
    
for f in namelist:
    with open(merge_file, 'w') as ofile:
        for value in frag_lists[f]:
            ofile.write("- "+str(value)+"\n")