In [1]:
# Se importan las librerías
import pandas as pd
from neo4j import GraphDatabase
import time

In [2]:
# Se carga el csv con el preprocesado de la parte de Lucas
# El csv es del 13/12/2022
df = pd.read_csv('C:/Users/migue/AplicacionesYTendencias/ProyectoAyT/UEM-Analytics-G22-3/BBDD-Grafos/totalPrueba.csv')
df.head()

Unnamed: 0,id,idioma,titulo,genero,abstract,categorias
0,doi:10.1007/978-3-031-16203-9_8,en,The Comprehensive Model of Using In-Depth Cons...,OriginalPaper,paper describ relev machin learn method name t...,"['Computational Intelligence', 'Artificial Int..."
1,doi:10.1007/978-3-031-08246-7_7,en,Implementation of Reinforcement-Learning Algor...,OriginalPaper,problem autonom robot navig indoor environ mus...,"['Computational Intelligence', 'Artificial Int..."
2,doi:10.1007/978-981-19-4960-9_31,en,Reinforcement Learning for Autonomous Driving ...,OriginalPaper,decisionmak process autonom vehicl come numer ...,"['Computational Intelligence', 'Artificial Int..."
3,doi:10.1007/978-3-031-18461-1_11,en,A Survey of Reinforcement Learning Toolkits fo...,OriginalPaper,game industri becom one excit creativ industri...,"['Computational Intelligence', 'Control, Robot..."
4,doi:10.1007/978-981-19-7648-3_12,en,Priority-Aware Computational Resource Allocation,OriginalPaper,vehicular fog comput vfc expect promis scheme ...,"['Computer Science', 'Communications Engineeri..."


### Conectar a Neo4j Desktop

In [3]:
# Se crea una clase en la que se esctablece la conexión entre la máquina local donde
# se encuentra el código de python y la base de datos de Neo4j Desktop.
# Le indicamos la URL, el usuario y la contraseña para completar la conexión
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response
        
conn = Neo4jConnection(uri="bolt://localhost:7687", 
                       user="usermate",              
                       pwd="glass")

### Creación BBDD

In [4]:
df.head()

Unnamed: 0,id,idioma,titulo,genero,abstract,categorias
0,doi:10.1007/978-3-031-16203-9_8,en,The Comprehensive Model of Using In-Depth Cons...,OriginalPaper,paper describ relev machin learn method name t...,"['Computational Intelligence', 'Artificial Int..."
1,doi:10.1007/978-3-031-08246-7_7,en,Implementation of Reinforcement-Learning Algor...,OriginalPaper,problem autonom robot navig indoor environ mus...,"['Computational Intelligence', 'Artificial Int..."
2,doi:10.1007/978-981-19-4960-9_31,en,Reinforcement Learning for Autonomous Driving ...,OriginalPaper,decisionmak process autonom vehicl come numer ...,"['Computational Intelligence', 'Artificial Int..."
3,doi:10.1007/978-3-031-18461-1_11,en,A Survey of Reinforcement Learning Toolkits fo...,OriginalPaper,game industri becom one excit creativ industri...,"['Computational Intelligence', 'Control, Robot..."
4,doi:10.1007/978-981-19-7648-3_12,en,Priority-Aware Computational Resource Allocation,OriginalPaper,vehicular fog comput vfc expect promis scheme ...,"['Computer Science', 'Communications Engineeri..."


In [5]:
# Llenamos la base de datos.
# Se crean CONSTRAINTS para garantizar que los nodos no estén
# duplicados mientras configuramos algunos índices.
conn.query('CREATE CONSTRAINT papers IF NOT EXISTS FOR (p:Paper) REQUIRE p.id IS UNIQUE')
conn.query('CREATE CONSTRAINT categories IF NOT EXISTS FOR (c:Category) REQUIRE c.category IS UNIQUE')

[]

In [6]:
query = '''
WITH 'file:///totalPrueba.csv' AS data 
LOAD CSV WITH HEADERS FROM data AS row
MERGE (c:Category {category: row.categorias})
'''
conn.query(query, db='neo4j2')

[]

In [7]:
query = '''
WITH 'file:///totalPrueba.csv' AS data 
LOAD CSV WITH HEADERS FROM data AS row
MERGE (p:Paper {id:row.id}) ON CREATE SET p.titulo = row.titulo
 
// connect categories
WITH row, p
UNWIND row.categorias AS category_name
MATCH (c:Category {category: category_name})
MERGE (p)-[:IN_CATEGORY]->(c)
'''
conn.query(query, db='neo4j2')

[]

In [8]:
query = '''
MATCH (p:Paper)-[r:IN_CATEGORY]->(c:Category) 
RETURN p,c LIMIT 25
'''
conn.query(query, db='neo4j2')

[<Record p=<Node element_id='4:72029615-e08a-45bb-a9c8-557d244bc950:569' labels=frozenset({'Paper'}) properties={'titulo': 'The Scope and Applications of Nature-Inspired Computing in Bioinformatics\r\n', 'id': 'doi:10.1007/978-981-19-6379-7_1'}> c=<Node element_id='4:72029615-e08a-45bb-a9c8-557d244bc950:0' labels=frozenset({'Category'}) properties={'category': "['Computational Intelligence', 'Artificial Intelligence']"}>>,
 <Record p=<Node element_id='4:72029615-e08a-45bb-a9c8-557d244bc950:4709' labels=frozenset({'Paper'}) properties={'titulo': 'E-commerce Application with Analytics for Pharmaceutical Industry', 'id': 'doi:10.1007/978-981-19-3590-9_22'}> c=<Node element_id='4:72029615-e08a-45bb-a9c8-557d244bc950:0' labels=frozenset({'Category'}) properties={'category': "['Computational Intelligence', 'Artificial Intelligence']"}>>,
 <Record p=<Node element_id='4:72029615-e08a-45bb-a9c8-557d244bc950:4233' labels=frozenset({'Paper'}) properties={'titulo': 'The Investigation of Network Se

In [11]:
from pandas import DataFrame

query = '''
MATCH (p:Paper)
RETURN DISTINCT p.id, p.titulo, p.abstract
'''
dtf_data = DataFrame([dict(_) for _ in conn.query(query, db='neo4j2')])
dtf_data.sample(10)

Unnamed: 0,p.id,p.titulo,p.abstract
12612,doi:10.1007/s10489-022-03451-1,Video summarization with u-shaped transformer,
2596,doi:10.1007/978-3-031-18154-2_1,Baby Fever,
5151,doi:10.1007/978-3-031-15457-7_8,Strategic Communication in a Transnational Pro...,
785,doi:10.1007/978-981-19-3015-7_32,Knowing Your Customers Using Customer Segmenta...,
9120,doi:10.1007/s13201-022-01830-0,River flow prediction based on improved machin...,
17867,doi:10.1007/978-3-031-04721-3_7,"Additive Manufacturing of Ceramics: Materials,...",
5414,doi:10.1007/978-3-031-08954-1_95,Fear and Panic Buying Behavior Associated with...,
5443,doi:10.1007/978-981-19-1610-6_57,A Systematized Literature Review: Internet of ...,
9787,doi:10.1007/s40031-022-00794-8,Structural Seismic Vibration Analysis Using Mu...,
1308,doi:10.1007/978-981-19-3842-9_60,Visual-Inertial Odometry Design Based on Nonli...,


In [6]:
def add_categories(categories):
    # Añade el nodo de categorías a la BBDD de grafos de Neo4j.
    query = '''
            UNWIND $rows AS row
            MERGE (c:Category {category: row.category})
            RETURN count(*) as total
            '''
        
    return conn.query(query, parameters = {'rows':categories.to_dict('records')})
    #return conn.query(query, parameters = {'rows':categories.to_dict('records')})


In [7]:
# def add_authors(rows, batch_size=10000):
#     # Añade a la BBDD de grafos de Neo4j el nodo de autores.
#     # Adds author nodes to the Neo4j graph as a batch job.
#     query = '''
#             UNWIND $rows AS row
#             MERGE (:Author {name: row.author})
#             RETURN count(*) as total
#             '''
#     return insert_data(query, rows, batch_size)



In [7]:
def insert_data(query, rows, batch_size = 10000):
    # Funcición para gestionar las cargas de la BBDD de Neo4j en modo batch.
    # Puede ser útil para cuando se tengan más de 50.000 cargas.
    
    total = 0
    batch = 0
    start = time.time()
    result = None
    
    while batch * batch_size < len(rows):

        res = conn.query(query, 
                         parameters = {'rows': rows[batch*batch_size:(batch+1)*batch_size].to_dict('records')})
        total += res[0]['total']
        batch += 1
        result = {"total":total, 
                  "batches":batch, 
                  "time":time.time()-start}
        print(result)
        
    return result

In [8]:
def add_papers(rows, batch_size=5000):
   # Se Añade a la BBDD de grafos de Neo4j el nodo de los papers.

   # Añade el nodo de papers (:Author)--(:Paper)  
   # (:Paper)--(:Category) y las relaciones a la BBDD de grafos de Neo4j en modo batch.
 
   query = '''
   UNWIND $rows as row
   MERGE (p:Paper {id:row.id}) ON CREATE SET p.titulo = row.titulo
 
   // connect categories
   WITH row, p
   UNWIND row.categorias AS category_name
   MATCH (c:Category {category: category_name})
   MERGE (p)-[:IN_CATEGORY]->(c)
   '''
 
   return insert_data(query, rows, batch_size)

In [8]:
# def add_papers(rows, batch_size=5000):
#    # Se Añade a la BBDD de grafos de Neo4j el nodo de los papers.

#    # Añade el nodo de papers (:Author)--(:Paper)  
#    # (:Paper)--(:Category) y las relaciones a la BBDD de grafos de Neo4j en modo batch.
 
#    query = '''
#    UNWIND $rows as row
#    MERGE (p:Paper {id:row.id}) ON CREATE SET p.title = row.title
 
#    // connect categories
#    WITH row, p
#    UNWIND row.clase_pri AS category_name
#    MATCH (c:Category {category: category_name})
#    MERGE (p)-[:IN_CATEGORY]->(c)
 
#    // connect authors
#    WITH distinct row, p // reduce cardinality
#    UNWIND row.autores AS author
#    MATCH (a:Author {name: author})
#    MERGE (a)-[:AUTHORED]->(p)
#    RETURN count(distinct p) as total
#    '''
 
#    return insert_data(query, rows, batch_size)

In [9]:
df.head()

Unnamed: 0,titulo,autores,abstract,clase_pri,clase_otr,id
0,Reinforcement Learning through Asynchronous Ad...,Mohammad Babaeizadeh,We introduce a hybrid CPU/GPU version of the...,Machine Learning (cs.LG),,0
1,Altruistic Maneuver Planning for Cooperative A...,Behrad Toghi,With the adoption of autonomous vehicles on ...,Robotics (cs.RO),,1
2,Towards Understanding Asynchronous Advantage A...,Han Shen,Asynchronous and parallel implementation of ...,Machine Learning (cs.LG),; Optimization and Control (math.OC),2
3,The Advantage Regret-Matching Actor-Critic,Audrūnas Gruslys,Regret minimization has played a key role in...,Artificial Intelligence (cs.AI),; Machine Learning (cs.LG),3
4,Actor-Critic Sequence Training for Image Capti...,Li Zhang,Generating natural language descriptions of ...,Computer Vision and Pattern Recognition (cs.CV),,4


In [9]:
categories = pd.DataFrame(df[['categorias']])
categories.rename(columns={'categorias':'categoria'},
                  inplace=True)
categories = categories.explode('categoria') \
                       .drop_duplicates(subset=['categoria'])

# authors = pd.DataFrame(df[['autores']])
# authors.rename(columns={'autores':'author'},
#                inplace=True)
# authors=authors.explode('author').drop_duplicates(subset=['author'])

In [10]:
add_categories(categories)

TypeError: query() got an unexpected keyword argument 'parameters'

In [11]:
# add_authors(authors)

TypeError: query() got an unexpected keyword argument 'parameters'

In [11]:
add_papers(df)

TypeError: query() got an unexpected keyword argument 'parameters'