## Initialize the ckan environment and requests session

In [None]:
from os import path, environ
import requests
from dataflows import Flow, load
from datapackage_pipelines_ckanext.helpers import get_plugin_configuration

config = get_plugin_configuration('odata_org_il')
data_path = config['data_path']

CKAN_API_KEY = environ.get('CKAN_API_KEY')
CKAN_URL = environ.get('CKAN_URL')
assert CKAN_API_KEY and CKAN_URL
CKAN_AUTH_HEADERS = {'Authorization': CKAN_API_KEY}
session = requests.session()
session.headers.update(CKAN_AUTH_HEADERS)

## Common imports and functions

In [None]:
from dataflows import Flow, load
import yaml
from load_existing_entities import get_existing_entities_resource, get_existing_entities_resource_descriptor
from collections import defaultdict

def load_odata_resource(name, resources=None):
    path = f'{data_path}/{name}/datapackage.json'
    print(f'Loading resource from {path}')
    resource = Flow(load(path, resources=resources)).results()[0][0]
    print(f'Resource loaded: {name} ({len(resource)} rows)')
    return resource

def yaml_print(data):
    print(yaml.dump(data, allow_unicode=True, default_flow_style=False))

def get_existing_entities_flow():
    stats = defaultdict(int)
    return Flow(
        load(({'resources': [get_existing_entities_resource_descriptor()]}, 
              [get_existing_entities_resource(stats)]))
    ), stats


## Load resources

In [None]:
foi_offices_resource = load_odata_resource('new_foi_offices')
foi_groups_matching_resource = load_odata_resource('foi_groups_matching')

## Load existing entities (groups) from CKAN api

In [None]:
existing_entities_flow, existing_entities_stats = get_existing_entities_flow()
existing_entities_resource = existing_entities_flow.results()[0][0]
yaml_print(dict(existing_entities_stats))
print(f'Loaded existing entities ({len(existing_entities_resource)} rows)')

## Find mismatched foi office groups

In [None]:
existing_entities_group_ids = {}
existing_entities_entity_ids = {}

for existing_entity in existing_entities_resource:
    existing_entities_group_ids[existing_entity['group_id']] = existing_entity
    existing_entities_entity_ids[existing_entity['entity_id']] = existing_entity
    
mismatched_entity_ids = set()
mismatched_group_ids = set()
    
for foi_group_match in foi_groups_matching_resource:
    match_group_id = foi_group_match['Column3']
    match_entity_id = foi_group_match['entity_id']
    if match_group_id and match_entity_id:
        if match_entity_id not in existing_entities_entity_ids:
            # print(f'{match_group_id} : {match_entity_id}')
            matching_entity_by_group_id = existing_entities_group_ids.get(match_group_id)
            matching_entity_by_entity_id = existing_entities_entity_ids.get(match_entity_id)
            if matching_entity_by_group_id and not matching_entity_by_entity_id:
                print(f'group {matching_entity_by_group_id["title"]}: no matching entity {match_entity_id}')
                mismatched_entity_ids.add(match_entity_id)
                mismatched_group_ids.add(match_group_id)
            else:
                raise Exception()
        

## Run update_foi_offices dry run

In [None]:
from update_foi_offices_entities import get_foi_offices_resource, get_existing_entities, get_foi_groups_matching
from collections import defaultdict
import yaml

DRY_RUN = True

stats = defaultdict(int)

existing_entities = {}
for row in get_existing_entities(existing_entities_resource, existing_entities, stats):
    pass

for row in get_foi_groups_matching(foi_groups_matching_resource, existing_entities, stats):
    pass

foi_offices_resource_only_mismatched = [r for r in foi_offices_resource if f"foi-office-{r['nid']}" in mismatched_entity_ids]

for row in get_foi_offices_resource(foi_offices_resource_only_mismatched, existing_entities, stats, DRY_RUN):
    pass

## Before updating - save the group datasets, otherwise they will be disconnected from group

In [None]:
%%sh
dpp run --verbose ./dump_group_datasets

In [None]:
stats = defaultdict(int)
for row in load_odata_resource('dump_group_datasets', resources=['group_datasets']):
    if not row['dataset_ids']:
        stats['0 | groups without datasets'] += 1
    elif len(row['dataset_ids']) == 1:
        stats['1 | groups with 1 dataset'] += 1
    elif 1 < len(row['dataset_ids']) < 11:
        stats['2 | groups with 2-10 datasets'] += 1
    elif len(row['dataset_ids']) > 10:
        stats['3 | groups with more then 10 datasets'] += 1
yaml_print(dict(stats))

## Do the update

In [None]:
DRY_RUN = False

for row in get_foi_offices_resource(foi_offices_resource_only_mismatched, existing_entities, stats, DRY_RUN):
    pass

## Restore the datasets

In [None]:
from os import path, environ
import requests
from dataflows import Flow, load
from datapackage_pipelines_ckanext.helpers import get_plugin_configuration

def restore_group_datasets(row):
    group_id = row['group_id']
    if group_id in mismatched_group_ids:
        for dataset_id in row['dataset_ids']:
            res = session.post('{}/api/3/action/member_create'.format(CKAN_URL),
                               json=dict(id=group_id,
                                         object=dataset_id,
                                         object_type='package',
                                         capacity='')).json()
            assert res and res['success']

Flow(
    load(path.join(data_path, 'dump_group_datasets/datapackage.json'), resources=['group_datasets']),
    restore_group_datasets
).process()
