In [None]:
# Step 1.1: Install Required Libraries
!pip install kaggle pandas neo4j transformers sentence-transformers

# Step 1.2: Import Necessary Libraries
import os
import pandas as pd
from neo4j import GraphDatabase
from transformers import pipeline

Collecting neo4j
  Downloading neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Colle

In [None]:
# Step 2.2: Upload Kaggle API Token
from google.colab import files
files.upload()  # Upload the kaggle.json file

# Step 2.3: Configure Kaggle API
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Step 2.4: Download Dataset
!kaggle datasets download -d nechbamohammed/research-papers-dataset
!unzip research-papers-dataset.zip

Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/nechbamohammed/research-papers-dataset
License(s): unknown
Downloading research-papers-dataset.zip to /content
 99% 583M/590M [00:08<00:00, 126MB/s]
100% 590M/590M [00:08<00:00, 74.2MB/s]
Archive:  research-papers-dataset.zip
  inflating: dblp-v10.csv            


In [None]:
# Step 3.1: Load the Dataset with Proper Handling of Bad Lines
df = pd.read_csv('dblp-v10.csv', on_bad_lines='skip')

# Step 3.2: Select Only the First 5,000 Rows
df = df.head(100000)

# Step 3.3: Clean the Data
# Drop rows with missing values in critical columns
df = df.dropna(subset=['title', 'abstract', 'authors', 'n_citation', 'references', 'venue', 'year', 'id'])

# Normalize Columns
df['authors'] = df['authors'].apply(lambda x: x.split(',') if isinstance(x, str) else [])
df['references'] = df['references'].apply(lambda x: x.split(',') if isinstance(x, str) else [])

# Step 3.4: Save the Cleaned Dataset
df.to_csv('cleaned_dblp.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['authors'] = df['authors'].apply(lambda x: x.split(',') if isinstance(x, str) else [])


In [None]:
data=pd.read_csv('cleaned_dblp.csv')

In [None]:
data.shape

(86638, 8)

In [None]:
# Step 3.1: Custom Error Handler to Log Skipped Rows
skipped_rows = []

def handle_bad_line(line):
    skipped_rows.append(line)
    return None

# Load the Dataset with Custom Error Handler
df = pd.read_csv('dblp-v10.csv', on_bad_lines=handle_bad_line, engine='python')

# Print Skipped Rows for Debugging
print("Skipped Rows:", skipped_rows)

# Step 3.2: Select Only the First 5,000 Rows
df = df.head(100000)

# Step 3.3: Clean the Data
# Drop rows with missing values in critical columns
df = df.dropna(subset=['title', 'abstract', 'authors', 'n_citation', 'references', 'venue', 'year', 'id'])

# Normalize Columns
df['authors'] = df['authors'].apply(lambda x: x.split(',') if isinstance(x, str) else [])
df['references'] = df['references'].apply(lambda x: x.split(',') if isinstance(x, str) else [])

# Step 3.4: Save the Cleaned Dataset
df.to_csv('cleaned_dblp.csv', index=False)

Skipped Rows: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['authors'] = df['authors'].apply(lambda x: x.split(',') if isinstance(x, str) else [])


In [None]:
# Step 4.1: Inspect the Dataset
print(df.head())
print(df.columns)
print(f"Dataset Shape: {df.shape}")

                                            abstract  \
0  In this paper, a robust 3D triangular mesh wat...   
1  We studied an autoassociative neural network w...   
2  It is well-known that Sturmian sequences are t...   
3  One of the fundamental challenges of recognizi...   
4  This paper generalizes previous optimal upper ...   

                                             authors  n_citation  \
0          [['S. Ben Jabra',  'Ezzeddine Zagrouba']]          50   
1  [['Joaquín J. Torres',  'Jesús M. Cortés',  'J...          50   
2        [['Genevi eve Paquin',  'Laurent Vuillon']]          50   
3  [['Yaser Sheikh',  'Mumtaz Sheikh',  'Mubarak ...         221   
4  [['Efraim Laksman',  'Håkan Lennerstad',  'Mag...           0   

                                          references  \
0  [['09cb2d7d-47d1-4a85-bfe5-faa8221e644b',  '10...   
1  [['4017c9d2-9845-4ad2-ad5b-ba65523727c5',  'b1...   
2  [['1c655ee2-067d-4bc4-b8cc-bc779e9a7f10',  '2e...   
3  [['056116c1-9e7a-4f9b-a918-

In [None]:
from neo4j import GraphDatabase

# Step 1.1: Connect to Neo4j
uri = "neo4j+s://d5e9ec78.databases.neo4j.io"  # Replace with your Neo4j URI
username = "neo4j"             # Replace with your username
password = ""          # Replace with your password

driver = GraphDatabase.driver(uri, auth=(username, password))

In [None]:
# Step 2.1: Define a Function to Add Nodes and Relationships
def add_paper(tx, paper_id, title, abstract, authors, n_citation, references, venue, year):
    # Create Paper Node (Central Node)
    tx.run("""
        MERGE (p:Paper {id: $paper_id})
    """, paper_id=paper_id)

    # Create Title Node and Relationship
    tx.run("""
        MERGE (t:Title {text: $title})
        MERGE (p:Paper {id: $paper_id})
        MERGE (p)-[:HAS_TITLE]->(t)
    """, title=title, paper_id=paper_id)

    # Create Abstract Node and Relationship
    if abstract:
        tx.run("""
            MERGE (a:Abstract {text: $abstract})
            MERGE (p:Paper {id: $paper_id})
            MERGE (p)-[:HAS_ABSTRACT]->(a)
        """, abstract=abstract, paper_id=paper_id)

    # Create Author Nodes and Relationships
    for author in authors:
        tx.run("""
            MERGE (a:Author {name: $author})
            MERGE (p:Paper {id: $paper_id})
            MERGE (p)-[:AUTHORED_BY]->(a)
        """, author=author.strip(), paper_id=paper_id)

    # Create Venue Node and Relationship
    if venue:
        tx.run("""
            MERGE (v:Venue {name: $venue})
            MERGE (p:Paper {id: $paper_id})
            MERGE (p)-[:PUBLISHED_AT]->(v)
        """, venue=venue.strip(), paper_id=paper_id)

    # Create Year Node and Relationship
    if year:
        tx.run("""
            MERGE (y:Year {value: $year})
            MERGE (p:Paper {id: $paper_id})
            MERGE (p)-[:HAS_YEAR]->(y)
        """, year=year, paper_id=paper_id)

    # Create Citation Node and Relationship
    if n_citation:
        tx.run("""
            MERGE (c:Citation {count: $n_citation})
            MERGE (p:Paper {id: $paper_id})
            MERGE (p)-[:HAS_CITATION]->(c)
        """, n_citation=n_citation, paper_id=paper_id)

    # Create Reference Relationships
    for ref in references:
        tx.run("""
            MERGE (p:Paper {id: $paper_id})
            MERGE (r:Paper {id: $ref})
            MERGE (p)-[:REFERENCES]->(r)
        """, paper_id=paper_id, ref=ref.strip())

    # Create ID Node and Relationship
    tx.run("""
        MERGE (i:ID {value: $paper_id})
        MERGE (p:Paper {id: $paper_id})
        MERGE (p)-[:HAS_ID]->(i)
    """, paper_id=paper_id)

# Step 2.2: Populate the Graph
with driver.session() as session:
    for _, row in df.iterrows():
        session.write_transaction(
            add_paper,
            paper_id=row['id'],
            title=row['title'],
            abstract=row['abstract'],
            authors=row['authors'],
            n_citation=row['n_citation'],
            references=row['references'],
            venue=row['venue'],
            year=row['year']
        )

#Retrieved data from graph


In [None]:
def run_query(query):
    with driver.session() as session:
        result = session.run(query)
        return [record for record in result]

# Example Query: Retrieve Papers by Title Keyword
query = """

MATCH (p:Paper)-[:HAS_ABSTRACT]->(a:Abstract)
WHERE a.text CONTAINS 'neural network'
RETURN p.id AS paper_id, a.text AS abstract
"""

results = run_query(query)
for record in results:
    print(record["paper_id"], record["abstract"])

4ab39729-af77-46f7-a662-16984fb9c1db We studied an autoassociative neural network with dynamic synapses which include a facilitating mechanism. We have developed a general mean-field framework to study the relevance of the different parameters defining the dynamics of the synapses and their influence on the collective properties of the network. Depending on these parameters, the network shows different types of behaviour including a retrieval phase, an oscillatory regime, and a non-retrieval phase. In the oscillatory phase, the network activity continously jumps between the stored patterns. Compared with other activity-dependent mechanisms such as synaptic depression, synaptic facilitation enhances the network ability to switch among the stored patterns and, therefore, its adaptation to external stimuli. A detailed analysis of our system reflects an efficient-more rapid and with lesser errors-network access to the stored information with stronger facilitation. We also present a set of 

In [None]:
# Example Query: Retrieve Papers by Title Keyword
query = """

MATCH (p:Paper)-[:HAS_ABSTRACT]->(a:Abstract), (p)-[:HAS_TITLE]->(t:Title)
WHERE a.text CONTAINS 'neural network'
RETURN p.id AS paper_id, t.text AS title, a.text AS abstract
"""

results = run_query(query)
for record in results:
    print(f"Paper ID: {record['paper_id']}")
    print(f"Title: {record['title']}")
    print(f"Abstract: {record['abstract']}")
    print("-" * 50)

Paper ID: 4ab39729-af77-46f7-a662-16984fb9c1db
Title: Attractor neural networks with activity-dependent synapses: The role of synaptic facilitation
Abstract: We studied an autoassociative neural network with dynamic synapses which include a facilitating mechanism. We have developed a general mean-field framework to study the relevance of the different parameters defining the dynamics of the synapses and their influence on the collective properties of the network. Depending on these parameters, the network shows different types of behaviour including a retrieval phase, an oscillatory regime, and a non-retrieval phase. In the oscillatory phase, the network activity continously jumps between the stored patterns. Compared with other activity-dependent mechanisms such as synaptic depression, synaptic facilitation enhances the network ability to switch among the stored patterns and, therefore, its adaptation to external stimuli. A detailed analysis of our system reflects an efficient-more ra

#prompt template

In [None]:
from langchain.prompts import ChatPromptTemplate

In [None]:
PROMPT_TEMPLATE = """
answer as precisely as possible based on the below context
Question: \n {question} \n
Context: \n {context}?\n
Answer:"""


In [None]:
prompt="Get 5 papers whose abstract is based on neural network. For every paper give a brief about each paper findings in 5 lines."

In [None]:
context_text = "\n".join([
    f"Paper ID: {record['paper_id']}, Title: {record['title']}, Abstract: {record['abstract']}"
    for record in results
])
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt_ans = prompt_template.format(context=context_text, question=prompt)
print("\n\n",prompt_ans)



 Human: 
answer as precisely as possible based on the below context
Question: 
 Get 5 papers whose abstract is based on neural network. For every paper give a brief about each paper findings in 5 lines. 

Context: 
 Paper ID: 4ab39729-af77-46f7-a662-16984fb9c1db, Title: Attractor neural networks with activity-dependent synapses: The role of synaptic facilitation, Abstract: We studied an autoassociative neural network with dynamic synapses which include a facilitating mechanism. We have developed a general mean-field framework to study the relevance of the different parameters defining the dynamics of the synapses and their influence on the collective properties of the network. Depending on these parameters, the network shows different types of behaviour including a retrieval phase, an oscillatory regime, and a non-retrieval phase. In the oscillatory phase, the network activity continously jumps between the stored patterns. Compared with other activity-dependent mechanisms such as syn

In [None]:
import google.generativeai as genai
genai.configure(api_key="AIzaSyAxvgJPkHBsll0kqEIBIvVBBeP-Y9eDl6c")
model = genai.GenerativeModel("gemini-1.5-flash")
response = model.generate_content(prompt_ans)
print(response.text)

Here are 5 papers whose abstracts are based on neural networks, along with a brief summary of their findings:


1. **Paper ID: 4ab39729-af77-46f7-a662-16984fb9c1db:** This paper investigates an autoassociative neural network with activity-dependent synapses, focusing on synaptic facilitation.  A mean-field framework reveals different network behaviors (retrieval, oscillatory, non-retrieval) depending on synaptic parameters.  Synaptic facilitation improves pattern switching and information access speed compared to synaptic depression. Monte Carlo simulations support analytical findings.

2. **Paper ID: 4ab5e3f4-9b58-4fbb-9bde-ee2f2185cc61:** This study develops a weighting-delay-based method for analyzing the stability of recurrent neural networks (RNNs) with time-varying delays.  By dividing the delay interval into subintervals, less conservative delay-dependent stability criteria are derived.  Optimal weighting-delay parameters are calculated using optimization methods, enhancing stab

#Example


In [None]:
prompt = "What are the key findings about neural networks in the provided papers?"

In [None]:
context_text = "\n".join([
    f"Paper ID: {record['paper_id']}, Title: {record['title']}, Abstract: {record['abstract']}"
    for record in results
])
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt_ans = prompt_template.format(context=context_text, question=prompt)
print("\n\n",prompt_ans)



 Human: 
answer as precisely as possible based on the below context
Question: 
 What are the key findings about neural networks in the provided papers? 

Context: 
 Paper ID: 4ab39729-af77-46f7-a662-16984fb9c1db, Title: Attractor neural networks with activity-dependent synapses: The role of synaptic facilitation, Abstract: We studied an autoassociative neural network with dynamic synapses which include a facilitating mechanism. We have developed a general mean-field framework to study the relevance of the different parameters defining the dynamics of the synapses and their influence on the collective properties of the network. Depending on these parameters, the network shows different types of behaviour including a retrieval phase, an oscillatory regime, and a non-retrieval phase. In the oscillatory phase, the network activity continously jumps between the stored patterns. Compared with other activity-dependent mechanisms such as synaptic depression, synaptic facilitation enhances th

In [None]:
response = model.generate_content(prompt_ans)
print(response.text)

The provided papers explore various aspects of neural networks, including:

* **Dynamic Synapses and Network Behavior:** One study investigated attractor neural networks with activity-dependent synapses, specifically focusing on synaptic facilitation's impact on network behavior.  Key findings indicate that synaptic facilitation enhances the network's ability to switch between stored patterns, leading to more efficient information retrieval. Different parameter settings resulted in retrieval, oscillatory, and non-retrieval phases.

* **Recurrent Neural Network Stability:** Another paper developed a weighting-delay-based method to analyze the stability of recurrent neural networks (RNNs) with time-varying delays. This method yielded less conservative stability criteria compared to previous approaches, demonstrating that stability margins depend on weighting-delay parameters, which can be optimized.

* **Neural Networks in Applications:**  Many papers applied neural networks to diverse a

In [None]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')

df['abstract_embedding'] = df['abstract'].apply(lambda x: model.encode(x) if x else None)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# Convert NumPy arrays to lists of floats
df['abstract_embedding'] = df['abstract_embedding'].apply(lambda x: x.tolist() if x is not None else None)

In [None]:
def add_embedding(tx, abstract_text, embedding):
    # Update the Abstract node with the embedding
    tx.run(
        """
        MATCH (a:Abstract {text: $abstract_text})
        SET a.embedding = $embedding
        """,
        abstract_text=abstract_text,
        embedding=embedding
    )

# Iterate over the DataFrame and store embeddings
with driver.session() as session:
    for _, row in df.iterrows():
        if row['abstract'] and row['abstract_embedding']:
            session.execute_write(
                add_embedding,
                abstract_text=row['abstract'],
                embedding=row['abstract_embedding']
            )