## CM strata composition migration
### requires neo4j model v4
### first apply :
*micorr/stratigraphies/neo4jdata/mc-338-periodic-table-elements-v4-model.cql*
(including cm families added by commit 28f3400216d7c3a10fc8354a3f1cb0867814fd98)

In [42]:
# Connect to graph db
from py2neo import neo4j
import os

neo4jUrl = os.environ.get('NEO4J_URL',"http://neo4j:secret@neo4j:7474/db/data/")
#neo4jUrl  = "http://neo4j:secret@neo4j:7474/db/data/"
graph = neo4j.Graph(neo4jUrl)

from stratigraphies.neo4jdao import Neo4jDAO
neo = Neo4jDAO()

In [44]:
ls

#m_strata_verif.md#                     compound_names.csv
Migrate_CM_strata_composition.ipynb     cp_strata_verif.csv
Migrate_CP_strata_composition.ipynb     cp_strata_verif.md
Migrate_metal_strata_composition.ipynb  m_strata_verif.csv
Neo4j_import_elements.ipynb             m_strata_verif.md
Untitled.ipynb                          test.txt


## Retrieve  current CM strata composition characteristics

CM strata composition characteristics are additional elements lists (for our container conversion concern)
that are in fact associated with (soon to be removed) M and CP children strata

M child :
  - Additional elements in M *(uses cpCompositionExtensionFamily)*
  
CP child:
  - Additional elements in CP *(also uses cpCompositionExtensionFamily)*
  - Additional elements in CP aggregates *(uses cmCpAggregatesCompositionFamily)*

so we'll MATCH all children strata, retrieve all their characteristics and create
corresponding new containers for storing elements directly in the parent strata (CM strata)



In [2]:
neo4jUrl

'http://neo4j:secret@neo4j:7474/db/data/'

In [9]:
res=graph.cypher.execute("""
MATCH (sgpy:Stratigraphy)-[POSSES]->(strata:Strata)-[:IS_PARENT_OF]->(cstrata:Strata)
    -[:IS_CONSTITUTED_BY]->(c:Characteristic)-[:BELONGS_TO]->(f:Family)
        WHERE f.uid <> "natureFamily" 
RETURN sgpy.uid, sgpy.description, strata, cstrata.uid,f.uid, c.uid, c.name  ORDER BY f.uid,cstrata.uid
""")
print res

    | sgpy.uid                             | sgpy.description               | strata                                                                                                                                                       | cstrata.uid                                           | f.uid                           | c.uid                                              | c.name                            
----+--------------------------------------+--------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------+---------------------------------+----------------------------------------------------+------------------------------------
  1 | 380b19c4-ce69-11e6-b01d-000c29a72d3e | search                         | (n3920:Strata {date:"2016-12-30",label:"strata",stratigraphy_uid:"380b19c4-ce69-11e6-b01d-0

### create a dictionary containing corresponding elements lists for each stratum

In [40]:
from collections import defaultdict
def create_strata_additional_elements_dicts(res):
    ## Additional elements ( there could be multiple characteristic belonging to cpCompositionExtensionFamily)
    strata_updates = {}
    for r in res:
        if  r.strata['uid'] not in strata_updates:
            strata_updates[r.strata['uid']]={'description':r['sgpy.description'],
                                            'strata_node':r.strata,
                                            'component_node':None,
                                             'containers':defaultdict(list)
                                            # using defaultdict we won't create empty containers
                                            #'containers': {
                                            #    'cmcpCompositionAdditionalElements':[],
                                            #    'cmcpagCompositionAdditionalElements':[],
                                            #    'cmmCompositionAdditionalElements':[],
                                            #}
                                       }
        if r['c.name']!='other':
            containers = strata_updates[r.strata['uid']]['containers']
            if r['f.uid']=='cmCpAggregatesCompositionFamily':
                container_key='cmcpagCompositionAdditionalElements'             
            elif r['f.uid']=='cpCompositionExtensionFamily':
                # cpCompositionExtensionFamily is used in both M and CP children strata
                # to avoid separate query of double loop over M and CP strata we use strata uid suffix
                # to differentiate the Nature ( _childCP / _childM)
                if r['cstrata.uid'][-7:]=='childCP':
                     container_key='cmcpCompositionAdditionalElements'
                else: # is then "childM"
                     container_key='cmmCompositionAdditionalElements'
            else:
                continue
            containers[container_key].append(r['c.name'])                   
    return strata_updates

strata_add_updates = create_strata_additional_elements_dicts(res)

In [39]:
strata_add_updates.items()[:2]

[(u'3a0dd7e6-4e18-11e7-a0ea-000c29a72d3e_Strata3',
  {'component_node': None,
   'containers': defaultdict(list,
               {'cmcpCompositionAdditionalElements': [u'Ca'],
                'cmcpagCompositionAdditionalElements': [u'Sn'],
                'cmmCompositionAdditionalElements': [u'S']}),
   'description': u'search',
   'strata_node': <Node graph=u'http://neo4j:7474/db/data/' ref=u'node/6092' labels=set([u'Strata']) properties={u'date': u'2017-06-10', u'uid': u'3a0dd7e6-4e18-11e7-a0ea-000c29a72d3e_Strata3', u'stratigraphy_uid': u'3a0dd7e6-4e18-11e7-a0ea-000c29a72d3e', u'label': u'strata'}>}),
 (u'd03fa5de-c2ed-11e6-a862-000c29a72d3e_Strata1',
  {'component_node': None,
   'containers': defaultdict(list,
               {'cmmCompositionAdditionalElements': [u'O', u'Cu']}),
   'description': u'search',
   'strata_node': <Node graph=u'http://neo4j:7474/db/data/' ref=u'node/3602' labels=set([u'Strata']) properties={u'date': u'2016-12-15', u'uid': u'd03fa5de-c2ed-11e6-a862-000c29a

In [37]:
# just nicer & filtered strata_add_updates output for verification:
for suid,update in strata_add_updates.items():
    print update['description']
    print suid
    for container,elements in update['containers'].items():
        print '   {}: '.format(container), elements
    print

search
3a0dd7e6-4e18-11e7-a0ea-000c29a72d3e_Strata3
   cmmCompositionAdditionalElements:  [u'S']
   cmcpCompositionAdditionalElements:  [u'Ca']
   cmcpagCompositionAdditionalElements:  [u'Sn']

search
d03fa5de-c2ed-11e6-a862-000c29a72d3e_Strata1
   cmmCompositionAdditionalElements:  [u'O', u'Cu']

Bronze mirror
8cdcf1d4-1937-11e7-962a-000c29a72d3e_Strata3
   cmmCompositionAdditionalElements:  [u'Pb', u'Sn']

Bed structure
5e7030dc-25da-11e6-9e7f-000c29148083_Strata5

search
380b19c4-ce69-11e6-b01d-000c29a72d3e_Strata1
   cmmCompositionAdditionalElements:  [u'Fe']
   cmcpCompositionAdditionalElements:  [u'Ca']
   cmcpagCompositionAdditionalElements:  [u'Other']

Headrest
69f5a9e8-527e-11e6-bdec-000c29148083_Strata4
   cmcpCompositionAdditionalElements:  [u'Sn']

Sword
81810dc6-5316-11e6-bdec-000c29148083_Strata5
   cmcpCompositionAdditionalElements:  [u'Al', u'Si']

search
d992de7c-ca8a-11e6-9ec4-000c29a72d3e_Strata1
   cmmCompositionAdditionalElements:  [u'Sn', u'Cu']
   cmcpCompositio

## Create container nodes in CM strata to hold new element list

In [24]:
# use create_containers method from neo4jdao
def create_containers_in_strata(strata_updates):
    for strata_uid, composition in strata_updates.iteritems():

        containers={}
        for family,element_list in composition['containers'].iteritems():
            if len(element_list): # if we don't want to create empty containers
                containers[family]=[{'name': e} for e in element_list]
        # print strata, containers['cpCompositionMainElements'], containers['cpCompositionSecondaryElements'],  containers['cpCompositionCompounds']
        print strata_uid
        print containers
        # create containers in stratum or component
        if composition['component_node']:
            print "adding to component:{}".format(composition['component_node'])
            neo.create_containers(composition['component_node'], containers)
        else:
            print "adding to stratum:{}".format(strata_uid)
            neo.create_containers(composition['strata_node'], containers)


[(u'3a0dd7e6-4e18-11e7-a0ea-000c29a72d3e_Strata3',
  {'component_node': None,
   'containers': defaultdict(list,
               {'cmcpagCompositionAdditionalElements': [u'Sn'],
                'cmmCompositionAdditionalElements': [u'Ca', u'S']}),
   'strata_node': <Node graph=u'http://neo4j:7474/db/data/' ref=u'node/6092' labels=set([u'Strata']) properties={u'date': u'2017-06-10', u'uid': u'3a0dd7e6-4e18-11e7-a0ea-000c29a72d3e_Strata3', u'stratigraphy_uid': u'3a0dd7e6-4e18-11e7-a0ea-000c29a72d3e', u'label': u'strata'}>}),
 (u'd03fa5de-c2ed-11e6-a862-000c29a72d3e_Strata1',
  {'component_node': None,
   'containers': defaultdict(list,
               {'cmmCompositionAdditionalElements': [u'O', u'Cu']}),
   'strata_node': <Node graph=u'http://neo4j:7474/db/data/' ref=u'node/3602' labels=set([u'Strata']) properties={u'date': u'2016-12-15', u'uid': u'd03fa5de-c2ed-11e6-a862-000c29a72d3e_Strata1', u'stratigraphy_uid': u'd03fa5de-c2ed-11e6-a862-000c29a72d3e', u'label': u'strata'}>})]

In [None]:
#create containers for additional elements
create_containers_in_strata(strata_add_updates)

In [None]:
# In case we would need to delete all containers just created above
# MATCH (n:Nature {uid:"cm"})-[:HAS_FAMILY]->(f:Family)<-[:BELONGS_TO]-(co:Container)-[:IS_CONSTITUTED_BY]->(ch:Characteristic)
# DETACH DELETE co
# RETURN n,f,co,ch

## Deleting old characteristics

In [None]:
# todo