# PART C

In [2]:
from neo4j import GraphDatabase

URI = "neo4j://localhost"
AUTH = ("neo4j", "12345678")

driver = GraphDatabase.driver(URI, auth=AUTH)
driver.verify_connectivity()

STEP1: Create the Database Community Node

In [4]:
# Define a function to create the Database Community node
def create_database_community(tx):
    tx.run("MERGE (:Community {name: 'Database Community'})")

# Execute the function within a session
with driver.session() as session:
    session.execute_write(create_database_community)

STEP2: Match Keywords for Database Community 
1. data management,indexing,data modeling,big data,data processing,data storage,data querying --> Computer Science

In [5]:
def relate_keywords_to_community(tx):
    tx.run("""
    MATCH (kw:Keyword), (dbComm:Community {name: 'Database Community'})
    WHERE kw.name IN ['data management', 'indexing', 'data modeling', 'big data', 'data processing', 'data storage', 'data querying']
      AND kw.domain = 'Computer Science'
    CREATE (kw)-[:DEFINES]->(dbComm)
    """)

# Execute the function within a session
with driver.session() as session:
    session.execute_write(relate_keywords_to_community)

In [6]:
def create_keywords_and_relate_to_community(tx):
    # Ensure the Database Community node exists
    tx.run("MERGE (:Community {name: 'Database Community'})")
    
    # List of keywords related to the Database Community
    keywords = [
        'data management', 'indexing', 'data modeling', 
        'big data', 'data processing', 'data storage', 'data querying'
    ]
    
    for keyword in keywords:
        # Create the keyword if it doesn't exist and relate it to the Database Community
        tx.run("""
            MERGE (kw:Keyword {name: $keyword, domain: 'Computer Science'})
            WITH kw
            MATCH (dbComm:Community {name: 'Database Community'})
            MERGE (kw)-[:DEFINES]->(dbComm)
            """, keyword=keyword)
        print("Keyword:",keyword, "added into Database Community",)

with driver.session() as session:
    session.execute_write(create_keywords_and_relate_to_community)

Keyword: data management added into Database Community
Keyword: indexing added into Database Community
Keyword: data modeling added into Database Community
Keyword: big data added into Database Community
Keyword: data processing added into Database Community
Keyword: data storage added into Database Community
Keyword: data querying added into Database Community


(keyword)--[:DEFINES]--(Database Community)

STEP 3: RELATE database community with computer science keywords 

In [7]:
def create_and_return_cs_keyword_relationships(tx):
    results = tx.run("""
    MATCH (kw:keywords {domain: 'Computer Science'}), (dbComm:Community {name: 'Database Community'})
    MERGE (kw)-[r:RELATED_TO]->(dbComm)
    RETURN kw.name AS keyword, dbComm.name AS community
    """)
    return results.values()

# Execute the function within a session and print the results
with driver.session() as session:
    relationships = session.execute_write(create_and_return_cs_keyword_relationships)
    print("Relationship is sucessfully established")

Relationship is sucessfully established


(keywords {domain:"Computer Science"})-[:RELATED_TO]-(Database Community)

STEP4:Find Conferences and Journals Related to the Database Community
#To consider a conference or journal related to the database community, at least 90% of its published papers must contain one of the keywords defining the community.

In [8]:
def find_related_conferences_and_journals(tx):
    query = """
    MATCH (p:Paper)-[:HAS]->(kw:Keyword)
    WHERE kw.name IN ['data management', 'indexing', 'data modeling', 'big data', 'data processing', 'data storage', 'data querying']
    WITH p, COLLECT(kw.name) AS keywords
    MATCH (p)-[:PUBLISHED_IN]->(pub)
    WITH pub, COUNT(p) AS totalPapers, COUNT(keywords) AS keywordPapers
    WHERE keywordPapers >= 0.9 * totalPapers
    SET pub:RelatedToDatabaseCommunity
    RETURN pub
    """
    return list(tx.run(query))

STEP 5:Finding Conferences and Journals where a high percentage of papers are associated with the keywords defined by the Database Community.  

In [9]:
class DatabaseCommunityRecommender:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()


    def find_related_publications(self):
        with self.driver.session() as session:
            result = session.execute_write(self._find_and_label_publications)
            for record in result:
                print(f"Publication: {record['publication']['name']}, "
                      f"Papers with Keywords: {record['papersWithKeywords']}, "
                      f"Total Papers: {record['totalPapers']}")

    @staticmethod
    def _find_and_label_publications(tx):
        query = """
        MATCH (c:Community {name: "Database Community"})<-[:RELATED_TO]-(kw:keywords)
        WITH c, kw
        MATCH (kw)<-[:HAS]-(p:paper)
        WITH kw, p
        MATCH (p)-[:PUBLISHED_IN]->(publication)
        WITH publication, COUNT(DISTINCT p) AS papersWithKeywords
        MATCH (publication)<-[:PUBLISHED_IN]-(p2:paper)
        WITH publication, papersWithKeywords, COUNT(DISTINCT p2) AS totalPapers
        WHERE papersWithKeywords >= 0.9 * totalPapers
        SET publication:RelatedToDatabaseCommunity
        RETURN publication, papersWithKeywords, totalPapers
        """
        results = tx.run(query)
        return [{"publication": record["publication"], "papersWithKeywords": record["papersWithKeywords"], "totalPapers": record["totalPapers"]} for record in results]

# Usage
uri = "neo4j://localhost:7687"
user = "neo4j"
password = "12345678"
recommender = DatabaseCommunityRecommender(uri, user, password)
recommender.find_related_publications()

Publication: IEEE Transactions on Software Engineering, Papers with Keywords: 7, Total Papers: 7
Publication: IEEE Software, Papers with Keywords: 6, Total Papers: 6
Publication: IEEE Transactions on Knowledge and Data Engineering, Papers with Keywords: 30, Total Papers: 33
Publication: Communications of the ACM, Papers with Keywords: 2, Total Papers: 2
Publication: PeerJ, Papers with Keywords: 1, Total Papers: 1
Publication: Patterns, Papers with Keywords: 1, Total Papers: 1
Publication: IEEE Transactions on Affective Computing, Papers with Keywords: 1, Total Papers: 1
Publication: Protein Science, Papers with Keywords: 2, Total Papers: 2
Publication: Empirical Software Engineering, Papers with Keywords: 13, Total Papers: 13
Publication: Computer, Papers with Keywords: 1, Total Papers: 1
Publication: Cambridge International Law Journal, Papers with Keywords: 1, Total Papers: 1
Publication: GigaScience, Papers with Keywords: 1, Total Papers: 1
Publication: Physical Review Letters, Pape

STEP6: Identifying Top Papers
1. Matches papers published in journals and conferences related to the Database Community.
2. Calculates the number of citations each paper has received.
3. Orders the papers by their citation count in descending order.
4. Limits the results to the top 100 papers and labels them as TopPaper.
5. Returns the top papers and their citation counts.

For Removing Top Papers
- MATCH (paper:TopPaper)
- REMOVE paper:TopPaper
- RETURN count(paper) AS totalUpdated

In [10]:
from neo4j import GraphDatabase

class toppaper:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def find_top_papers(self):
        with self.driver.session() as session:
            result = session.run("""
            MATCH (journal:RelatedToDatabaseCommunity)<-[:PUBLISHED_IN]-(paper:paper)
            WITH paper, SIZE([(paper)<-[:CITES]-(citing:paper) | citing]) AS citations
            ORDER BY citations DESC
            LIMIT 100
            SET paper:TopPaper
            RETURN paper AS TopPaper, citations
            """)
            top_papers = [(record["TopPaper"]["id"], record["citations"]) for record in result]

        for paper_id, citations in top_papers:
            print(f"Top Paper ID: {paper_id} with {citations} citations")

# Usage
uri = "neo4j://localhost:7687"
user = "neo4j"
password = "12345678"
paper_finder = toppaper(uri, user, password)
paper_finder.find_top_papers()
# paper_finder.close()


Top Paper ID: b7599c8ba88e7c93edbce57df513152e8f5693e7 with 50 citations
Top Paper ID: 12748e904f5025ca1757ce49d72cad3878e1be8f with 50 citations
Top Paper ID: ee4fd9cd27836870dd18eb2d81efac596a758fb1 with 49 citations
Top Paper ID: 0c86e8d19d0fc62a5f829ea625ffd3e7fa9551b9 with 49 citations
Top Paper ID: 9579ed0d182ba134ab3ed14ba0defbb324147399 with 49 citations
Top Paper ID: 590ead4aeddbf8fea8414998b2dc3b74576a71cb with 49 citations
Top Paper ID: 1a517c7b56d1d2f2d3e8e209c438e32663a750c9 with 49 citations
Top Paper ID: 564614da76b5d9020c700b78e1fe154bd590c47d with 49 citations
Top Paper ID: cd8156fc9f17146b39dfaf47fcd20f1e3ab70791 with 48 citations
Top Paper ID: 74eb4d6abf1d0236be338c1bd5ee59a498b961b1 with 47 citations
Top Paper ID: 27e57cc2f22c1921d2a1c3954d5062e3fe391553 with 47 citations
Top Paper ID: bcd2c5379a34068040750a751e4fd2710d90c15c with 47 citations
Top Paper ID: d0da9ce3ca989bce2579b64be9aed518265a8994 with 46 citations
Top Paper ID: e6a5bcdb576f2b0d965a9f71f245145539667

STEP7: Identify Potential Reviewers
- Potential reviewers are authors of the top-100 papers. We can find and label these authors using a Cypher query

This query:

1. Matches authors who have written the top papers.
2. Counts the number of top papers each author has written.
3. Labels these authors as PotentialReviewer.
4. Returns the authors and their contribution counts.

In [11]:
def find_potential_reviewers(self):
    with self.driver.session() as session:
        reviewers = session.execute_write(self._find_and_label_potential_reviewers)
        for reviewer in reviewers:
            print(f"Author: {reviewer['Author'].get('name', 'Unnamed')} with {reviewer['contributions']} contributions marked as PotentialReviewer")

@staticmethod
def _find_and_label_potential_reviewers(tx):
    query = """
    MATCH (a:author)-[:WRITES]->(p:TopPaper)
    WITH a, COUNT(p) AS contributions
    SET a:PotentialReviewer
    RETURN a AS Author, contributions
    """
    results = tx.run(query)
    return [{"Author": record["Author"], "contributions": record["contributions"]} for record in results]


DatabaseCommunityRecommender.find_potential_reviewers = find_potential_reviewers
DatabaseCommunityRecommender._find_and_label_potential_reviewers = _find_and_label_potential_reviewers
recommender = DatabaseCommunityRecommender("neo4j://localhost:7687", "neo4j", "12345678")
recommender.find_potential_reviewers()


Author: P. Flach with 1 contributions marked as PotentialReviewer
Author: José Hernández Orallo with 1 contributions marked as PotentialReviewer
Author: C. Ferri with 1 contributions marked as PotentialReviewer
Author: Lidia Contreras-Ochando with 1 contributions marked as PotentialReviewer
Author: N. Lachiche with 1 contributions marked as PotentialReviewer
Author: M. J. R. Quintana with 1 contributions marked as PotentialReviewer
Author: Fernando Martínez-Plumed with 1 contributions marked as PotentialReviewer
Author: Meelis Kull with 1 contributions marked as PotentialReviewer
Author: Iqbal H. Sarker with 1 contributions marked as PotentialReviewer
Author: Dakuo Wang with 1 contributions marked as PotentialReviewer
Author: Erick Oduor with 1 contributions marked as PotentialReviewer
Author: Justin D. Weisz with 1 contributions marked as PotentialReviewer
Author: Josh Andres with 1 contributions marked as PotentialReviewer
Author: Casey Dugan with 1 contributions marked as PotentialR

STEP8: Identifying Gurus

In [12]:
class DatabaseCommunityRecommender:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def find_gurus(self):
        with self.driver.session() as session:
            gurus = session.execute_write(self._find_and_label_gurus)
            for guru in gurus:
                print(f"Guru: {guru['Author'].get('name', 'Unnamed')} with {guru['contributions']} top papers")

    @staticmethod
    def _find_and_label_gurus(tx):
        query = """
        MATCH (a:author)-[:WRITES]->(p:TopPaper)
        WITH a, COUNT(p) AS contributions
        WHERE contributions >= 2
        SET a:Guru
        RETURN a AS Author, contributions
        """
        results = tx.run(query)
        return [{"Author": result["Author"], "contributions": result["contributions"]} for result in results]


uri = "neo4j://localhost:7687"
user = "neo4j"
password = "12345678"
recommender = DatabaseCommunityRecommender(uri, user, password)
recommender.find_gurus()
recommender.close()


Guru: P. Runeson with 2 top papers
Guru: Martin Höst with 2 top papers
Guru: P. Brereton with 3 top papers
Guru: D. Budgen with 2 top papers
Guru: B. Kitchenham with 3 top papers
Guru: Thomas D. LaToza with 2 top papers
Guru: N. Mulder with 2 top papers
Guru: B. Daigle with 2 top papers
Guru: H. Moseley with 2 top papers
Guru: M. Farman with 2 top papers
Guru: N. Moore with 2 top papers
Guru: J. Jaromczyk with 2 top papers
Guru: S. Chiu with 2 top papers
Guru: J. Izatt with 2 top papers
Guru: Sina Farsiu with 2 top papers
Guru: A. Torralba with 2 top papers
