In [2]:
import re
import networkx as nx
import matplotlib.pyplot as plt
from pybtex.database import parse_file

# Load and parse the .bib file
def parse_bib_file(file_path):
    bib_data = parse_file(file_path)
    entries = {}

    for key, entry in bib_data.entries.items():
        title = entry.fields.get('title', 'Unknown Title').lower()
        citations = entry.fields.get('note', '')  # Use 'note' or custom fields for citations
        cited_keys = re.findall(r'\b\w+\b', citations)
        entries[key] = {
            'title': title,
            'citations': cited_keys,
        }

    return entries

# Find the top N cited papers
def get_top_cited_papers(entries, top_n=5):
    citation_counts = {}

    for entry in entries.values():
        for citation in entry['citations']:
            citation_counts[citation] = citation_counts.get(citation, 0) + 1

    top_cited = sorted(citation_counts.items(), key=lambda x: x[1], reverse=True)[:top_n]
    return [key for key, _ in top_cited]

# Build a citation graph
def build_citation_graph(entries, top_papers):
    graph = nx.DiGraph()

    for key, data in entries.items():
        if key in top_papers:
            graph.add_node(key, title=data['title'])
            for citation in data['citations']:
                if citation in top_papers:
                    graph.add_edge(citation, key)

    return graph

# Plot the citation graph
def plot_citation_graph(graph):
    pos = nx.spring_layout(graph)
    plt.figure(figsize=(10, 8))

    nx.draw(graph, pos, with_labels=False, node_color='lightblue', node_size=3000, edge_color='gray')

    # Draw labels
    labels = {node: graph.nodes[node]['title'] for node in graph.nodes()}
    for key, (x, y) in pos.items():
        plt.text(x, y, labels[key], fontsize=8, ha='center', va='center', wrap=True)

    plt.title("Top 5 BERT Papers Citation Graph")
    plt.show()

# Main execution
if __name__ == "__main__":
    bib_file_path = "path_to_your_file.bib"  # Replace with your .bib file path

    # Parse the .bib file
    entries = parse_bib_file(bib_file_path)

    # Identify top 5 cited papers
    top_papers = get_top_cited_papers(entries, top_n=5)

    # Build and plot the citation graph
    citation_graph = build_citation_graph(entries, top_papers)
    plot_citation_graph(citation_graph)


ModuleNotFoundError: No module named 'pybtex'