### Conectar a Neo4j Desktop

In [1]:
import pandas as pd
from neo4j import GraphDatabase

In [4]:
df = pd.read_csv('C:/Users/migue/AplicacionesYTendencias/ProyectoAyT/UEM-Analytics-G22-3/BBDD-Grafos/data_advantage actor critic.csv')
df.head()

Unnamed: 0,titulo,autores,abstract,clase_pri,clase_otr
0,Reinforcement Learning through Asynchronous Ad...,Mohammad Babaeizadeh,We introduce a hybrid CPU/GPU version of the...,Machine Learning (cs.LG),
1,Altruistic Maneuver Planning for Cooperative A...,Behrad Toghi,With the adoption of autonomous vehicles on ...,Robotics (cs.RO),
2,Towards Understanding Asynchronous Advantage A...,Han Shen,Asynchronous and parallel implementation of ...,Machine Learning (cs.LG),; Optimization and Control (math.OC)
3,The Advantage Regret-Matching Actor-Critic,Audrūnas Gruslys,Regret minimization has played a key role in...,Artificial Intelligence (cs.AI),; Machine Learning (cs.LG)
4,Actor-Critic Sequence Training for Image Capti...,Li Zhang,Generating natural language descriptions of ...,Computer Vision and Pattern Recognition (cs.CV),


In [5]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [6]:
conn = Neo4jConnection(uri="bolt://localhost:7687", 
                       user="usermate",              
                       pwd="glass")

### Creación BBDD

In [7]:
conn.query('CREATE CONSTRAINT papers IF NOT EXISTS FOR (p:Paper) REQUIRE p.id IS UNIQUE')
conn.query('CREATE CONSTRAINT authors IF NOT EXISTS FOR (a:Author) REQUIRE a.name IS UNIQUE')
conn.query('CREATE CONSTRAINT categories IF NOT EXISTS FOR (c:Category) REQUIRE c.category IS UNIQUE')

[]

In [8]:
def add_categories(categories):
    # Adds category nodes to the Neo4j graph.
    query = '''
            UNWIND $rows AS row
            MERGE (c:Category {category: row.category})
            RETURN count(*) as total
            '''
    return conn.query(query, parameters = {'rows':categories.to_dict('records')})


def add_authors(rows, batch_size=10000):
    # Adds author nodes to the Neo4j graph as a batch job.
    query = '''
            UNWIND $rows AS row
            MERGE (:Author {name: row.author})
            RETURN count(*) as total
            '''
    return insert_data(query, rows, batch_size)


def insert_data(query, rows, batch_size = 10000):
    # Function to handle the updating the Neo4j database in batch mode.
    
    total = 0
    batch = 0
    start = time.time()
    result = None
    
    while batch * batch_size < len(rows):

        res = conn.query(query, 
                         parameters = {'rows': rows[batch*batch_size:(batch+1)*batch_size].to_dict('records')})
        total += res[0]['total']
        batch += 1
        result = {"total":total, 
                  "batches":batch, 
                  "time":time.time()-start}
        print(result)
        
    return result

In [9]:
def add_papers(rows, batch_size=5000):
   # Adds paper nodes and (:Author)--(:Paper) and 
   # (:Paper)--(:Category) relationships to the Neo4j graph as a 
   # batch job.
 
   query = '''
   UNWIND $rows as row
   MERGE (p:Paper {id:row.id}) ON CREATE SET p.title = row.title
 
   // connect categories
   WITH row, p
   UNWIND row.category_list AS category_name
   MATCH (c:Category {category: category_name})
   MERGE (p)-[:IN_CATEGORY]->(c)
 
   // connect authors
   WITH distinct row, p // reduce cardinality
   UNWIND row.cleaned_authors_list AS author
   MATCH (a:Author {name: author})
   MERGE (a)-[:AUTHORED]->(p)
   RETURN count(distinct p) as total
   '''
 
   return insert_data(query, rows, batch_size)

In [10]:
df.head()

Unnamed: 0,titulo,autores,abstract,clase_pri,clase_otr
0,Reinforcement Learning through Asynchronous Ad...,Mohammad Babaeizadeh,We introduce a hybrid CPU/GPU version of the...,Machine Learning (cs.LG),
1,Altruistic Maneuver Planning for Cooperative A...,Behrad Toghi,With the adoption of autonomous vehicles on ...,Robotics (cs.RO),
2,Towards Understanding Asynchronous Advantage A...,Han Shen,Asynchronous and parallel implementation of ...,Machine Learning (cs.LG),; Optimization and Control (math.OC)
3,The Advantage Regret-Matching Actor-Critic,Audrūnas Gruslys,Regret minimization has played a key role in...,Artificial Intelligence (cs.AI),; Machine Learning (cs.LG)
4,Actor-Critic Sequence Training for Image Capti...,Li Zhang,Generating natural language descriptions of ...,Computer Vision and Pattern Recognition (cs.CV),


In [11]:
categories = pd.DataFrame(df[['clase_pri']])
categories.rename(columns={'clase_pri':'category'},
                  inplace=True)
categories = categories.explode('category') \
                       .drop_duplicates(subset=['category'])

authors = pd.DataFrame(df[['autores']])
authors.rename(columns={'autores':'author'},
               inplace=True)
authors=authors.explode('author').drop_duplicates(subset=['author'])

In [12]:
add_categories(categories)

TypeError: query() got an unexpected keyword argument 'parameters'

In [13]:
add_authors(authors)

NameError: name 'time' is not defined

In [14]:
add_papers(df)

NameError: name 'time' is not defined