In [None]:
import requests
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

# Function to query the GBIF Literature API
def query_gbif_literature(doi):
    url = f"https://api.gbif.org/v1/literature/search?doi={doi}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error querying DOI {doi}: {response.status_code}")
        return None

# Function to process the API response and extract topics
def extract_topics(data):
    topics = []
    if data and 'results' in data and len(data['results']) > 0:
        for result in data['results']:
            if 'topics' in result and result['topics']:
                topics.extend(result['topics'])
            else:
                print(f"No topics found in result: {result}")
    else:
        print("No results found or data is None")
    return topics

# Path to the CSV file
file_path = r'D:\gbif_outputs\allDOIs.csv'

# Read the CSV file
df = pd.read_csv(file_path)

# Extract DOIs as a list
dois = df['DOI'].tolist()


In [None]:
# Initialize a dictionary to hold topic counts and co-occurrences
topic_counts = {}
topic_cooccurrences = {}

# Process each DOI with a progress bar
for doi in tqdm(dois, desc="Processing DOIs"):
    data = query_gbif_literature(doi)
    topics = extract_topics(data)
    
    if not topics:
        print(f"No topics extracted for DOI: {doi}")
        continue
    
    # Update topic counts
    for topic in topics:
        topic_upper = topic.upper()
        if topic_upper in topic_counts:
            topic_counts[topic_upper] += 1
        else:
            topic_counts[topic_upper] = 1
    
    # Handle single topic case
    if len(topics) == 1:
        continue
    
    # Update topic co-occurrences
    for i in range(len(topics)):
        for j in range(i + 1, len(topics)):
            pair = tuple(sorted([topics[i].upper(), topics[j].upper()]))
            if pair in topic_cooccurrences:
                topic_cooccurrences[pair] += 1
            else:
                topic_cooccurrences[pair] = 1


In [None]:
# Step 3: Create the Network Graph
G = nx.Graph()

# Add nodes with size based on topic counts and include count as an attribute
for topic, count in topic_counts.items():
    G.add_node(topic, size=count, count=count)

# Add edges with weight based on co-occurrences
for pair, weight in topic_cooccurrences.items():
    G.add_edge(pair[0], pair[1], weight=weight)

In [None]:
# Export the network to GraphML
nx.write_graphml(G, "topic_network.graphml")

In [None]:
# Step 5: Visualize the Network (Optional)
plt.figure(figsize=(12, 12))
pos = nx.spring_layout(G, k=0.1)
sizes = [G.nodes[node]['size'] * 100 for node in G.nodes]  # Adjust multiplier as needed for better visualization
nx.draw(G, pos, with_labels=True, node_size=sizes, font_size=10, node_color="skyblue", edge_color="gray")
plt.title("Topic Network")
plt.show()