In [119]:
%load_ext autoreload
%autoreload 2

import sys
import json
import re
from datetime import datetime
from gridfs import GridFS, NoFile
from pymongo import MongoClient
from bson.objectid import ObjectId
from schema import *

sys.path.append('../')

from db_interface import addMagresFile, databaseSearch, removeMagresFiles, editMagresFile
from db_schema import magresVersionSchema, magresIndexSchema, magresMetadataSchema

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Operating on the database

This databook is meant for admin operations on the database and testing of new changes. Important: do *not* use carelessly, because permanent data loss is a very real possibility.

There are two options for usage, and the default is to use a local copy of the database. This works by having MongoDB installed and simply running a `mongod` instance in another terminal. In that case, testing can be freely done on a simple toy database. If `use_local` is set to `False`, though, the connection is established with the remote database, and all changes will be permanent. Double check all your code before running it!

In [2]:
use_local = True

if use_local:
    db_url = 'localhost'
    db_port = 27017
else:
    db_url = ''
    db_port = 27017
    
client = MongoClient(host=db_url, port=db_port)
ccpnc = client.ccpnc
magresIndex = ccpnc.magresIndex
magresMetadata = ccpnc.magresMetadata
magresFilesFS = GridFS(ccpnc, 'magresFilesFS')

In [3]:
def listAllKeys():
    f = magresIndex.find({}).next()
    print(f.keys())

In [4]:
def listAllFiles(keys=None, validate=False):
    """
    List and print all files in the database. 
    
    | Args:
    |    keys [list]: if present, print only the values of the keys listed here, in tabulated form.
    |    validate [bool]: if True, also check whether the index voices are valid according to the schema
    
    """
    if keys is not None:
        print('\t'.join(keys))
    for f in magresIndex.find({}):
        line = ''
        if keys is None:
            line = str(f)
        else:
            line = '\t'.join([str(f[k]) for k in keys])
        if validate:
            val = 'Valid'
            try:
                fc = dict(f) 
                del(fc['_id']) # Not included in the schema
                magresIndexSchema.validate(fc)
            except Exception as e:
                val = 'Not valid ({0})'.format(e)
            line += '\t' + val
        print(line)

In [194]:
def updateWithSchema(defaults={}):
    """
    Update any invalid index or metadata by removing excess keys, and filling missing keys with default values,
    following the schema as reference.
    If default values for missing keys are not provided throws an error.
    
    Returns a list of index_id for each edited record
    """
    
    missr = re.compile("Missing key: '([a-zA-Z0-9]+)'")
    
    edited_ids = []
    
    for f in magresIndex.find({}):
        edited = False
        valid = False
        while not valid:
            fc = dict(f) 
            del(fc['_id']) # Not included in the schema
            try:
                magresIndexSchema.validate(fc)
            except SchemaMissingKeyError as e:
                # Add the missing key
                mk = missr.findall(str(e))[0]
                try:
                    magresIndex.update_one({'_id': f['_id']}, {'$set': {mk: defaults[mk]}})
                except KeyError:
                    raise ValueError('No defaults provided for keyword {0}'.format(mk))
                f[mk] = defaults[mk]
                edited = True
                continue
            except SchemaWrongKeyError as e:
                # Just remove the wrong key
                magresIndex.update_one({'_id': f['_id']}, {'$unset': {mk: None}})
                del(f[mk])
                edited = True
                continue
            valid = True
            
        # Same for Metadata
        valid = False
        md_id =  ObjectId(f['metadataID'])
        md = magresMetadata.find_one({'_id': md_id})
        while not valid:
            mfc = dict(md)
            del(mfc['_id']) # Not included in the schema
            try:
                magresMetadataSchema.validate(mfc)
            except SchemaMissingKeyError as e:
                # Add the missing key
                mk = missr.findall(str(e))[0]
                try:
                    if defaults[mk] != f[mk]:
                        raise RuntimeError('Incompatible values for keyword {0} between Index and Metadata'.format(mk))
                    magresMetadata.update_one({'_id': md_id}, {'$set': {mk: defaults[mk]}})
                except KeyError:
                    raise ValueError('No defaults provided for keyword {0}'.format(mk))
                md[mk] = defaults[mk]
                edited = True
                continue
            except SchemaWrongKeyError as e:
                # Just remove the wrong key
                magresMetadata.update_one({'_id': md_id}, {'$unset': {mk: None}})
                del(md[mk])
                edited = True
                continue
            valid = True
        if edited:
            edited_ids.append(f['_id'])
    
    return edited_ids