## CP strata composition migration
### requires neo4j model v4
### first apply :
*micorr/stratigraphies/neo4jdata/mc-338-periodic-table-elements-v4-model.cql*

In [None]:
# Connect to graph db
from py2neo import neo4j
import os

neo4jUrl = os.environ.get('NEO4J_URL',"http://neo4j:secret@neo4j:7474/db/data/")
#neo4jUrl  = "http://neo4j:secret@neo4j:7474/db/data/"
graph = neo4j.Graph(neo4jUrl)

from stratigraphies.neo4jdao import Neo4jDAO
neo = Neo4jDAO()

In [None]:
neo4jUrl

## Retrieve  current CP strata composition characteristics

In [None]:
res=graph.cypher.execute("""
    MATCH (sgpy:Stratigraphy)-[:POSSESSES]->(strata:Strata)-[:IS_CONSTITUTED_BY]->(c:Characteristic)-[:BELONGS_TO]->(f:Family {uid:"cpCompositionFamily"})
    OPTIONAL MATCH (strata)-[:IS_CONSTITUTED_BY]->(sc:SubCharacteristic)<-[:HAS_SPECIALIZATION]-(c)
    OPTIONAL MATCH (strata)-[:IS_CONSTITUTED_BY]->(ssc:SubCharacteristic)<-[:HAS_SPECIALIZATION]-(sc)
    RETURN sgpy.uid,sgpy.description, strata, c.name,sc.name, ssc.uid,ssc.name
    """)
#print res



### create a dictionary containing corresponding elements lists for each stratum

In [None]:
def create_strata_elements_dicts(res):
    strata_updates = {}
    for r in res:
        # convert SubCharacteristic names ("nearly" comma separated list of element in a string)
        # e.g.  'Cu, P, As, Co, Ni'
        # into an actual element name list -> secondary elements

        secondary_elements = [e_name.strip() for e_name in r['sc.name'].split(',')] if r['sc.name'] else []
        # "Convert" non element composition such as "C steel", C grey cast Iron" to "C" only
        secondary_elements = ['C' if 'C ' in e else e for e in secondary_elements]
        strata_updates[r.strata['uid']] = {'strata_node':r.strata,
                                           'component_node':r.cpnt if hasattr(r,'cpnt') else None,
                                           'containers': {
                                                'cpCompositionMainElements':[r['c.name']] if r['c.name']!='other' else [],
                                                'cpCompositionSecondaryElements':secondary_elements,
                                                'cpCompositionCompounds':[r['ssc.uid'].replace('cp','cpd',1)] if r['ssc.uid'] else []
                                           }
                                          }
    return strata_updates

strata_updates = create_strata_elements_dicts(res)

In [None]:
strata_updates.items()[:2]

## Retrieve current CP composition Additional elements (cpCompositionExtensionFamily)

In [None]:
res_additional_elements=graph.cypher.execute("""
MATCH (sgpy:Stratigraphy)-[:POSSESSES]->(strata:Strata)-[:IS_CONSTITUTED_BY]->
    (c:Characteristic)-[:BELONGS_TO]->(f:Family {uid:"cpCompositionExtensionFamily"})
    RETURN sgpy.uid,sgpy.description, strata, c.name,c.uid ORDER BY strata.uid
""")

### create a dictionary with additional elements list for each stratum

In [None]:
def create_strata_additional_elements_dicts(res):
    ## Additional elements ( there could be multiple characteristic belonging to cpCompositionExtensionFamily)
    strata_updates = {}
    for r in res:
        if  r.strata['uid'] not in strata_updates:
            strata_updates[r.strata['uid']]={'strata_node':r.strata,
                                            'component_node':r.cpnt if hasattr(r,'cpnt') else None,
                                            'containers': {'cpCompositionAdditionalElements':[r['c.name']] if r['c.name']!='other' else []}
                                       }
        elif r['c.name']!='other':                                   
            strata_updates[r.strata['uid']]['containers']['cpCompositionAdditionalElements'].append(r['c.name'])
    return strata_updates

strata_add_updates = create_strata_additional_elements_dicts(res_additional_elements)

In [None]:
strata_add_updates.items()[:2]

In [None]:
# alternative import csv file exported from neo4j browser
# import csv
# with open("/home/bernard/Downloads/metal_strata.csv","r") as f:
#    reader =csv.DictReader(f)
#    for r in reader:
#        print r['c.uid'],r[r['sc.name']

In [None]:
for k,u in strata_updates.iteritems():
    print k,u['containers']['cpCompositionMainElements'],u['containers']['cpCompositionSecondaryElements'],u['containers']['cpCompositionCompounds']

# Secondary components :
## Retrieve current CP composition and Additional elements (cpCompositionExtensionFamily)

In [None]:
res_cpnt=graph.cypher.execute("""
    MATCH (sgpy:Stratigraphy)-[:POSSESSES]->(strata:Strata)-[:INCLUDES]->(cpnt:Component)-[:IS_CONSTITUTED_BY]->(c:Characteristic)-[:BELONGS_TO]->(f:Family {uid:"cpCompositionFamily"})
    OPTIONAL MATCH (strata)-[:IS_CONSTITUTED_BY]->(sc:SubCharacteristic)<-[:HAS_SPECIALIZATION]-(c)
    OPTIONAL MATCH (strata)-[:IS_CONSTITUTED_BY]->(ssc:SubCharacteristic)<-[:HAS_SPECIALIZATION]-(sc)
    RETURN sgpy.uid,sgpy.description, strata, cpnt,f.uid, c.name, sc.name, ssc.uid,ssc.name
    """)
print res_cpnt[:2]

### create a dictionary containing elements lists for each stratum

In [None]:
strata_scnd_cpnt_updates = create_strata_elements_dicts(res_cpnt)

In [None]:
strata_scnd_cpnt_updates.items()[:2]

In [None]:
res_cpnt_additional_elements=graph.cypher.execute("""
MATCH (sgpy:Stratigraphy)-[:POSSESSES]->(strata:Strata)-[:INCLUDES]->(cpnt:Component)-[:IS_CONSTITUTED_BY]->
    (c:Characteristic)-[:BELONGS_TO]->(f:Family {uid:"cpCompositionExtensionFamily"})
    RETURN sgpy.uid,sgpy.description, strata, cpnt, c.name,c.uid ORDER BY strata.uid
""")

In [None]:
strata_scnd_cpnt_add_updates = create_strata_additional_elements_dicts(res_cpnt_additional_elements)

In [None]:
strata_scnd_cpnt_add_updates.items()[:2]

## Create container nodes to hold new element list

In [None]:
# use create_containers method from neo4jdao
def create_containers_in_strata(strata_updates):
    for strata_uid, composition in strata_updates.iteritems():

        containers={}
        for family,element_list in composition['containers'].iteritems():
            if len(element_list): # if we don't want to create empty containers
                containers[family]=[{'name': e} for e in element_list]
        # print strata, containers['cpCompositionMainElements'], containers['cpCompositionSecondaryElements'],  containers['cpCompositionCompounds']
        print strata_uid
        print containers
        # create containers in stratum or component
        if composition['component_node']:
            print "adding to component:{}".format(composition['component_node'])
            neo.create_containers(composition['component_node'], containers)
        else:
            print "adding to stratum:{}".format(strata_uid)
            neo.create_containers(composition['strata_node'], containers)


In [None]:
strata_add_updates.items()[:2]

In [None]:
#create containers for main, secondary elements and compounds 
create_containers_in_strata(strata_updates)

In [None]:
#create containers for additional elements
create_containers_in_strata(strata_add_updates)

In [None]:
strata_scnd_cpnt_updates.items()[:20]

In [None]:
#create secondary Component containers for main, secondary elements and compounds 
create_containers_in_strata(strata_scnd_cpnt_updates)

In [None]:
strata_scnd_cpnt_add_updates.items()[:20]

In [None]:
#create secondary Component containers for additional elements
create_containers_in_strata(strata_scnd_cpnt_add_updates)

In [None]:
# In case we would need to delete all containers just created above
# MATCH (n:Nature {uid:"cp"})-[:HAS_FAMILY]->(f:Family)<-[:BELONGS_TO]-(co:Container)-[:IS_CONSTITUTED_BY]->(ch:Characteristic)
# DETACH DELETE co
# RETURN n,f,co,ch


## verification 
###  (before deleting old characteristics)

Not updated for components yet

In [None]:
res=graph.cypher.execute("""
    MATCH (sgpy:Stratigraphy)-[:POSSESSES]->(strata:Strata)-[:IS_CONSTITUTED_BY]->(c:Characteristic)-[:BELONGS_TO]->(f:Family {uid:"cpCompositionFamily"})
    OPTIONAL MATCH (strata)-[:IS_CONSTITUTED_BY]->(sc:SubCharacteristic)<-[:HAS_SPECIALIZATION]-(c)
    OPTIONAL MATCH (strata)-[:IS_CONSTITUTED_BY]->(ssc:SubCharacteristic)<-[:HAS_SPECIALIZATION]-(sc)
    RETURN sgpy.uid,sgpy.description, strata.uid, c.name,sc.name, ssc.name ORDER by sgpy.description,strata.uid
    """)
print res

## create a csv/markdown table to compare old and new composition

In [None]:
import csv
#for r in res:
#    print r['sgpy.description'],r['sgpy.uid'],r['strata.uid'][-8:],r['c.name'],r['sc.name']

# to write markdown table using csv writer
class MDTableFile:
    def __init__(self,f):
        self.f =f
    def write(self,s):
        outs=''
        prev = ''
        for c in s:
            if prev=='|' and c=='|':
                outs+=' '
            outs+=c
            prev=c
        self.f.write('| '+outs+' |\r\n')
        
def export_to(file_name,file_ext):
    with open(file_name+'.'+file_ext, 'w') as csvfile:
        fieldnames=['stratigraphy',
         'strata.uid',
         'c.name',
         'cpCompositionMainElements',
         'sc.name',
         'cpCompositionSecondaryElements',
         'ssc.name',
         'cpCompositionCompounds']
        if file_ext == 'md':
            writer = csv.DictWriter(MDTableFile(csvfile), delimiter='|', lineterminator='', fieldnames=fieldnames)
        else:
            # Default to csv
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for r in res:
            row={f:r[f].encode('UTF-8') if r[f] else r[f] for f in fieldnames if f in r.__dict__}
            if file_ext == 'md':
                row['stratigraphy']='[https://dev.micorr.org//micorr/#/stratigraphy/{} {}]'.format(r['sgpy.uid'],r['sgpy.description'].encode('UTF-8'))
            else:
                row['stratigraphy']='=HYPERLINK("https://dev.micorr.org//micorr/#/stratigraphy/{}";"{}")'.format(r['sgpy.uid'],r['sgpy.description'].encode('UTF-8'))
                #row['stratigraphy']='=HYPERLINK("http://192.168.0.59:8000/micorr/#/stratigraphy/{}";"{}")'.format(r['sgpy.uid'],r['sgpy.description'].encode('UTF-8'))
            row['strata.uid'] = row['strata.uid'][-8:]
            row['cpCompositionMainElements']=''
            row['cpCompositionSecondaryElements']=''  
            row['cpCompositionCompounds']='' 
            elem_res = graph.cypher.execute("""
            MATCH (strata {uid:{strata_uid}})-[:INCLUDES]->(ctn:Container)-[rsc:IS_CONSTITUTED_BY]->(e:Characteristic), (ctn)-[:BELONGS_TO]->(f:Family)
            RETURN f,ctn,e ORDER BY f,rsc.order""",
                    strata_uid=r['strata.uid'])
            for er in elem_res:
                family=er.f['uid']            
                row[family] = er.e['uid'].encode('UTF-8') if not len(row[family]) else u','.join([row[family],er.e['uid']])
            print row
            print
            writer.writerow(row)                



# Export to md table "youtrack flavour"
export_to('cp_strata_verif','md')

# uncomment next line to export as regular csv file
# export_to('cp_strata_verif','csv')
    
    

## Deleting old characteristics

In [None]:
del_res=graph.cypher.execute("""
MATCH (sgpy:Stratigraphy)-[:POSSESSES]->(strata:Strata)-[rc:IS_CONSTITUTED_BY]->(c:Characteristic)-[:BELONGS_TO]->(f:Family {uid:"mCompositionFamily"})
    OPTIONAL MATCH (strata)-[rsc:IS_CONSTITUTED_BY]->(sc:SubCharacteristic)<-[:HAS_SPECIALIZATION]-(c)
    DELETE rc,rsc
    RETURN rc,rsc
    """)
del_res

In [None]:
del_res