In [1]:
# Se importan las librerías
import pandas as pd
from neo4j import GraphDatabase
import time

In [2]:
# Se carga el csv con el preprocesado de la parte de Lucas
# El csv es del 13/12/2022
df = pd.read_csv('totalPrueba.csv')
df.head()

Unnamed: 0,id,idioma,titulo,genero,abstract,categorias
0,doi:10.1007/978-3-031-16203-9_8,en,The Comprehensive Model of Using In-Depth Cons...,OriginalPaper,paper describ relev machin learn method name t...,"['Computational Intelligence', 'Artificial Int..."
1,doi:10.1007/978-3-031-08246-7_7,en,Implementation of Reinforcement-Learning Algor...,OriginalPaper,problem autonom robot navig indoor environ mus...,"['Computational Intelligence', 'Artificial Int..."
2,doi:10.1007/978-981-19-4960-9_31,en,Reinforcement Learning for Autonomous Driving ...,OriginalPaper,decisionmak process autonom vehicl come numer ...,"['Computational Intelligence', 'Artificial Int..."
3,doi:10.1007/978-3-031-18461-1_11,en,A Survey of Reinforcement Learning Toolkits fo...,OriginalPaper,game industri becom one excit creativ industri...,"['Computational Intelligence', 'Control, Robot..."
4,doi:10.1007/978-981-19-7648-3_12,en,Priority-Aware Computational Resource Allocation,OriginalPaper,vehicular fog comput vfc expect promis scheme ...,"['Computer Science', 'Communications Engineeri..."


In [3]:
df['genero'].value_counts()

OriginalPaper                                                13502
['OriginalPaper', 'Original Article']                          814
['OriginalPaper', 'Article']                                   616
['OriginalPaper', 'Research']                                  329
['OriginalPaper', 'Original Paper']                            312
                                                             ...  
['OriginalPaper', 'Low Vision']                                  1
['OriginalPaper', 'Original Paper - Production Geology ']        1
['OriginalPaper', 'Report']                                      1
['OriginalPaper', 'Gastrointestinal']                            1
['OriginalPaper', 'Symposium/Special Issue']                     1
Name: genero, Length: 497, dtype: int64

In [4]:
df['idioma'].value_counts()

en    17980
de       49
nl       33
Name: idioma, dtype: int64

### Conectar a Neo4j Desktop

In [6]:
# Se crea una clase en la que se esctablece la conexión entre la máquina local donde
# se encuentra el código de python y la base de datos de Neo4j Desktop.
# Le indicamos la URL, el usuario y la contraseña para completar la conexión
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response
        
conn = Neo4jConnection(uri="bolt://localhost:7687", 
                       user="usermate",              
                       pwd="glass")

### Creación BBDD

In [7]:
df.head()

Unnamed: 0,id,idioma,titulo,genero,abstract,categorias
0,doi:10.1007/978-3-031-16203-9_8,en,The Comprehensive Model of Using In-Depth Cons...,OriginalPaper,paper describ relev machin learn method name t...,"['Computational Intelligence', 'Artificial Int..."
1,doi:10.1007/978-3-031-08246-7_7,en,Implementation of Reinforcement-Learning Algor...,OriginalPaper,problem autonom robot navig indoor environ mus...,"['Computational Intelligence', 'Artificial Int..."
2,doi:10.1007/978-981-19-4960-9_31,en,Reinforcement Learning for Autonomous Driving ...,OriginalPaper,decisionmak process autonom vehicl come numer ...,"['Computational Intelligence', 'Artificial Int..."
3,doi:10.1007/978-3-031-18461-1_11,en,A Survey of Reinforcement Learning Toolkits fo...,OriginalPaper,game industri becom one excit creativ industri...,"['Computational Intelligence', 'Control, Robot..."
4,doi:10.1007/978-981-19-7648-3_12,en,Priority-Aware Computational Resource Allocation,OriginalPaper,vehicular fog comput vfc expect promis scheme ...,"['Computer Science', 'Communications Engineeri..."


In [9]:
# Llenamos la base de datos.
# Se crean CONSTRAINTS para garantizar que los nodos no estén
# duplicados mientras configuramos algunos índices.
conn.query('CREATE CONSTRAINT papers IF NOT EXISTS FOR (p:Paper) REQUIRE p.id IS UNIQUE')
conn.query('CREATE CONSTRAINT categories IF NOT EXISTS FOR (c:Category) REQUIRE c.category IS UNIQUE')

[]

In [10]:
# query = '''
# WITH 'file:///totalPrueba.csv' AS data 
# LOAD CSV WITH HEADERS FROM data AS row
# MERGE (c:Category {category: row.categorias})
# '''
# conn.query(query, db='neo4j2')

query = '''
//Create and Relation Papers and Categories
LOAD CSV WITH HEADERS FROM 'file:///totalPrueba.csv' AS row
MERGE (p:Paper {id:row.id}) ON CREATE SET p.titulo = row.titulo
WITH p, row
UNWIND split(row.categorias, ', ') AS category_name
MERGE (c:Category {category: category_name})
MERGE (p)-[:IN_CATEGORY]->(c)
'''
conn.query(query,db='neo4j')

[]

In [11]:
# query = '''
# //Create and Relation Papers and Categories
# LOAD CSV WITH HEADERS FROM 'file:///totalPrueba.csv' AS row
# MERGE (p:Paper {id:row.id}) ON CREATE SET p.titulo = row.titulo
# WITH p, row
# UNWIND split(row.categorias, ', ') AS category_name
# MERGE (c:Category {category: category_name})
# MERGE (p)-[:IN_CATEGORY]->(c)
# '''
# conn.query(query,db='neo4j2')

In [12]:
# query = '''
# WITH 'file:///totalPrueba.csv' AS data 
# LOAD CSV WITH HEADERS FROM data AS row
# MERGE (p:Paper {id:row.id}) ON CREATE SET p.titulo = row.titulo
# // connect categories
# WITH row, p
# UNWIND row.categorias AS category_name
# MATCH (p:Category {category: category_name})
# MERGE (p)-[:IN_CATEGORY]->(c)
# '''
# conn.query(query, db='neo4j2')

In [13]:
query = '''
MATCH (p:Paper)-[r:IN_CATEGORY]->(c:Category) 
RETURN p,c LIMIT 25
'''
conn.query(query, db='neo4j')

[<Record p=<Node element_id='4:fd98e22d-4acd-4275-91fc-02d5dc9c7d3a:443' labels=frozenset({'Paper'}) properties={'titulo': 'Mixed Martial Arts Bout Prediction Using Artificial Intelligence', 'id': 'doi:10.1007/978-3-031-17697-5_36'}> c=<Node element_id='4:fd98e22d-4acd-4275-91fc-02d5dc9c7d3a:33' labels=frozenset({'Category'}) properties={'category': "['Computational Intelligence'"}>>,
 <Record p=<Node element_id='4:fd98e22d-4acd-4275-91fc-02d5dc9c7d3a:12462' labels=frozenset({'Paper'}) properties={'titulo': 'Research of the Freight Trains Movement Stability with a Network Effect', 'id': 'doi:10.1007/978-3-031-20141-7_70'}> c=<Node element_id='4:fd98e22d-4acd-4275-91fc-02d5dc9c7d3a:33' labels=frozenset({'Category'}) properties={'category': "['Computational Intelligence'"}>>,
 <Record p=<Node element_id='4:fd98e22d-4acd-4275-91fc-02d5dc9c7d3a:4698' labels=frozenset({'Paper'}) properties={'titulo': 'Effect of Entrepreneurial Education on Entrepreneurial Intention: Mediating Role of Entrep

In [14]:
query = '''
//Create node for language and relationship for language and paper
WITH 'file:///totalPrueba.csv' AS data 
LOAD CSV WITH HEADERS FROM data AS row
CREATE (l:Language {language:row.idioma})
WITH row, l
MATCH (p:Paper {id: row.id})
MERGE (p)-[:IN_LANGUAGE]->(l)
'''
conn.query(query,db='neo4j')

[]

In [15]:
query = '''
MATCH (p:Paper)-[r:IN_LANGUAGE]->(l:Language) 
RETURN l,p LIMIT 25
'''
conn.query(query, db='neo4j')

[<Record l=<Node element_id='4:fd98e22d-4acd-4275-91fc-02d5dc9c7d3a:36238' labels=frozenset({'Language'}) properties={'language': 'en'}> p=<Node element_id='4:fd98e22d-4acd-4275-91fc-02d5dc9c7d3a:0' labels=frozenset({'Paper'}) properties={'titulo': 'The Comprehensive Model of\xa0Using In-Depth Consolidated Multimodal Learning to\xa0Study Trading Strategies in\xa0the\xa0Securities Market', 'id': 'doi:10.1007/978-3-031-16203-9_8'}>>,
 <Record l=<Node element_id='4:fd98e22d-4acd-4275-91fc-02d5dc9c7d3a:18176' labels=frozenset({'Language'}) properties={'language': 'en'}> p=<Node element_id='4:fd98e22d-4acd-4275-91fc-02d5dc9c7d3a:0' labels=frozenset({'Paper'}) properties={'titulo': 'The Comprehensive Model of\xa0Using In-Depth Consolidated Multimodal Learning to\xa0Study Trading Strategies in\xa0the\xa0Securities Market', 'id': 'doi:10.1007/978-3-031-16203-9_8'}>>,
 <Record l=<Node element_id='4:fd98e22d-4acd-4275-91fc-02d5dc9c7d3a:36239' labels=frozenset({'Language'}) properties={'language'

In [16]:
from pandas import DataFrame

query = '''
MATCH (p:Paper)
RETURN DISTINCT p.id, p.titulo, p.abstract
'''
dtf_data = DataFrame([dict(_) for _ in conn.query(query, db='neo4j')])
dtf_data.sample(10)

Unnamed: 0,p.id,p.titulo,p.abstract
2133,doi:10.1007/978-3-031-13433-3_1,What is Intelligent Construction?,
7947,doi:10.1007/978-3-031-16865-9_12,Factors Influencing the Intention to Adopt Big...,
12896,doi:10.1007/s10489-022-03190-3,AP-BERT: enhanced pre-trained model through av...,
11493,doi:10.1038/s41598-022-24567-x,Detecting hierarchical organization of pervasi...,
16238,doi:10.1007/978-3-031-11058-0_43,Simulation Model of Retarder Stopper on the En...,
1761,doi:10.1007/978-981-19-4052-1_72,A Hybrid Gray Wolf Optimizer for Modeling and ...,
419,doi:10.1007/978-3-031-19039-1_6,What Is on the Horizon?,
365,doi:10.1007/978-981-19-3035-5_4,A Study on Reinforcement Learning-Based Traffi...,
6176,doi:10.1007/978-981-19-4606-6_2,Instructions for the Preparation Intervention ...,
2884,doi:10.1007/978-981-19-1457-7_19,Effect of Catalyst in the Pyrolysis of Waste P...,
