In [26]:
import glob
import json
import os
import json
from gensim.models.doc2vec import Doc2Vec, TaggedDocument 
from scipy import spatial
from doc2vec_prep import stem_text

In [15]:
import configparser
from neo4j import GraphDatabase

In [30]:
def get_document_neo4j(driver, doc_id):
    document = None
    with driver.session() as session:        
        query = f"""
            MATCH (ro:PURE:ResearchOutput {{uuid: '{doc_id}'}})
            --> (p:PURE:Person)
            RETURN ro {{
                .title, 
                .uuid, 
                .abstract_value, 
                .keywords,
                .info_portalUrl,
                authors: collect(p {{.uuid, .name_firstName, .name_lastName, .info_portalUrl}})}}
            """
        result = session.run(query)
        for r in result:
            return r['ro']
        return None

In [17]:
config = configparser.ConfigParser()
config.read('.env')
uri = "bolt://roag.is.ed.ac.uk:7687"
driver = GraphDatabase.driver(uri, auth=(config['auth']['USERNAME'], config['auth']['PASSWORD']))

## Test model

In [18]:
#model = Doc2Vec.load(os.path.join("./", 'doc2vec_neo4j_min_c2_alpha.model'), mmap='r')
model = Doc2Vec.load(os.path.join("./", 'doc2vec_neo4j_v1.model'), mmap='r')


In [19]:
doc_tags=list(model.docvecs.doctags)

In [20]:
consistency_check={}

for test_id in doc_tags[:5]:
    doc_test = get_document_neo4j(driver, test_id)
    text = doc_test['title'] + doc_test['abstract_value']
    print("Exploring document: {}" .format(doc_test['title']))
    consistency_check[doc_test['title']]={}
    consistency_check[doc_test['title']]['abstract']=doc_test['abstract_value']
    
    for i in range(3):
        consistency_check[doc_test['title']][i]={}
        print("Iter {}" .format(i))
        vector = model.infer_vector(stem_text(text))
        simdocs = model.docvecs.most_similar([vector])
        j=0
        for doc_id, sim in simdocs:
            try:
                doc = get_document_neo4j(driver, doc_id)
                print("Sim Document: {} -- Sim: {}" .format( doc['title'], sim))
                consistency_check[doc_test['title']][i][j]={}
                consistency_check[doc_test['title']][i][j]["title"]=doc['title']
                consistency_check[doc_test['title']][i][j]["sim"]=sim
                j+=1
            except:
                pass
    

Exploring document: Autistic spectrum disorder in prehistory
Iter 0
Sim Document: Frontotemporal lobar degeneration and social behaviour -- Sim: 0.575931191444397
Sim Document: Cumulative cultural evolution in a non-copying task in children and Guinea baboons -- Sim: 0.5706261396408081
Sim Document: High-fidelity copying is not necessarily the key to cumulative cultural evolution -- Sim: 0.5500689148902893
Sim Document: Comparative assessment of behaviorally derived personality structures in golden handed tamarins (Saguinus midas), cotton-top tamarins (Saguinus oedipus), and common marmosets (Callithrix jacchus) -- Sim: 0.5467841625213623
Sim Document: Abnormal dynamics of activation of object use information in apraxia -- Sim: 0.5392575263977051
Sim Document: A posterior-anterior distinction between scene perception and scene construction in human medial parietal cortex -- Sim: 0.5330343246459961
Sim Document: Evidence for degraded low frequency verbal concepts in left resected tempor

In [11]:
consistency_check

{'Autistic spectrum disorder in prehistory': {'abstract': 'Individuals with ‘extraordinary’ or ‘different’ minds have been suggested to be central to invention and the spread of new ideas in prehistory, shaping modern human behaviour and conferring an evolutionary advantage at population level. In this article the potential for neuropsychiatric conditions such as autistic spectrum disorders to provide this difference is explored, and the ability of the archaeological record to provide evidence of human behaviour is discussed. Specific reference is made to recent advances in the genetics of these conditions, which suggest that neuropsychiatric disorders represent a non-advantageous, pathological extreme of the human mind and are likely a by-product rather than a cause of human cognitive evolution.',
  0: {0: {'title': 'Protecting the unprotected', 'sim': 0.5816113948822021},
   1: {'title': 'Cumulative cultural evolution in a non-copying task in children and Guinea baboons',
    'sim': 

In [22]:
with open('consistency_check.json', 'w') as fp:
    json.dump(consistency_check, fp)

In [23]:
doc_tags[0]

'0385e4c4-2d35-4059-a2eb-d7a34b27fa0d'

In [33]:
vec1=model.docvecs['0385e4c4-2d35-4059-a2eb-d7a34b27fa0d']
doc_test = get_document_neo4j(driver, '0385e4c4-2d35-4059-a2eb-d7a34b27fa0d')
print(doc_test)
text = doc_test['title'] + doc_test['abstract_value']
vec2 = model.infer_vector(stem_text(text))

similairty = spatial.distance.cosine(vec1, vec2)
       

{'info_portalUrl': 'https://www.research.ed.ac.uk/portal/en/publications/autistic-spectrum-disorder-in-prehistory(0385e4c4-2d35-4059-a2eb-d7a34b27fa0d).html', 'keywords': 'ASJC Scopus subject areas', 'title': 'Autistic spectrum disorder in prehistory', 'uuid': '0385e4c4-2d35-4059-a2eb-d7a34b27fa0d', 'abstract_value': 'Individuals with ‘extraordinary’ or ‘different’ minds have been suggested to be central to invention and the spread of new ideas in prehistory, shaping modern human behaviour and conferring an evolutionary advantage at population level. In this article the potential for neuropsychiatric conditions such as autistic spectrum disorders to provide this difference is explored, and the ability of the archaeological record to provide evidence of human behaviour is discussed. Specific reference is made to recent advances in the genetics of these conditions, which suggest that neuropsychiatric disorders represent a non-advantageous, pathological extreme of the human mind and are l

In [29]:
print(similairty)

0.839736670255661
