In [1]:
# Connect to MongoDB and save data
import pymongo
mongo = pymongo.MongoClient('mongodb://mongo_local:27017')
db = mongo['mockdata']

patents_col = db['patents']
labels_col = db['labels']

In [2]:
# build claim dependency map
claim_deps = {}
patents = patents_col.find({})
for patent in patents:
    patent_number = patent['patent_number']
    for claim in patent['claims']:
        claim_number, dep = claim['claim_number'], claim['dependencies']
        claim_deps[f'{patent_number}_{claim_number}'] = dep
        
def get_parent_claim_numbers(patent_number, claim_number):
    c_no = claim_number
    parents = []
    
    while True:
        key = f'{patent_number}_{c_no}'
        if key in claim_deps:
            value = claim_deps[key]
            if value is None:
                break
            else:
                c_no = int(value)
                parents.append(c_no)
        else:
            break
    return parents

In [3]:
from random import random, randrange, seed

# For each section in each label
# 1. Get the linked patents, select a few
# 2. Pick a few random claim numbers
# 3. Add a random score

seed(10)

labels = labels_col.find({})
for label in labels:
    patents = list(patents_col.find({'application_numbers': {'$in': label['application_numbers']}}))
    if len(patents):
        print(label['spl_id'])
        sections = []
        
        for section in label['sections']:
            scores = []
            for patent in patents:
                # Select up to 5 claims at random, for this section
                claim_nos = set()
                for _ in range(5):
                    claim_nos.add(randrange(len(patent['claims'])))
                # Make up scores in the range 0.5-1, for the selected claims
                for claim_no in claim_nos:
                    scores.append({
                        'patentNumber': patent['patent_number'],
                        'claimNumber': claim_no,
                        'parentClaimNumbers': get_parent_claim_numbers(patent['patent_number'], claim_no),
                        'score': round(random() / 2 + 0.5, 4)
                    })
            section['scores'] = scores
            sections.append(section)
    
        # Update Mongo
        label['sections'] = sections
        labels_col.update({'spl_id': label['spl_id']}, label, upsert=True)

4e3fa51f-456d-4863-af4b-1e1ae5e59b72
73a25b94-80b8-4505-b97f-011c06157915
00d3c93f-4eb7-4643-bb24-15ecc707897e
c6b5b158-a814-411c-90f2-7a84d8814145
47b14a6d-d22f-4c22-bc55-861b00502928
eba483a8-ccb1-4147-bea8-bc7a9bc6110a
3fca7a7d-b89b-4143-be19-f2be942986f5
f5e0e912-46d8-4bd5-b0d0-34b1f77d1bc4
ac092db4-b7d2-457e-a757-535422a52128


