In [9]:
import pandas as pd
from scholarly import scholarly
import json

# Load member IDs from the CSV file
members_df = pd.read_csv('_data/member_id.csv')
scholar_ids = members_df['id'].dropna().unique()

all_publications = []
seen_titles = set()

for scholar_id in scholar_ids:
    try:
        # Search for the author by ID and fetch their publication list
        author = scholarly.search_author_id(scholar_id)
        author = scholarly.fill(author, sections=['publications'])
        
        for pub in author['publications']:
            # Fill the publication object to retrieve detailed metadata (venue, full authors)
            pub_filled = scholarly.fill(pub)
            bib = pub_filled.get('bib', {})
            
            title = bib.get('title', '').strip()
            title_lower = title.lower()
            
            if title and title_lower not in seen_titles:
                seen_titles.add(title_lower)
                all_publications.append(bib)
                
    except Exception as e:
        print(f"Could not fetch data for ID {scholar_id}: {e}")

# Sort publications by year (latest first)
all_publications.sort(key=lambda x: int(x.get('pub_year', x.get('year', 0))), reverse=True)



In [10]:
all_publications

[{'title': 'A Protocol-Aware P4 Pipeline for MQTT Security and Anomaly Mitigation in Edge IoT Systems',
  'pub_year': 2026,
  'citation': 'arXiv preprint arXiv:2601.07536, 2026',
  'author': 'Bui Ngoc Thanh Binh and Pham Hoai Luan and Le Vu Trung Duong and Vu Tuan Hai and Yasuhiko Nakashima',
  'journal': 'arXiv preprint arXiv:2601.07536',
  'abstract': 'MQTT is the dominant lightweight publish--subscribe protocol for IoT deployments, yet edge security remains inadequate. Cloud-based intrusion detection systems add latency that is unsuitable for real-time control, while CPU-bound firewalls and generic SDN controllers lack MQTT awareness to enforce session validation, topic-based authorization, and behavioral anomaly detection. We propose a P4-based data-plane enforcement scheme for protocol-aware MQTT security and anomaly detection at the network edge. The design combines parser-safe MQTT header extraction with session-order validation, byte-level topic-prefix authorization with per-cl

In [11]:
formatted_publications = []
for i, pub in enumerate(all_publications):
    paper_title = pub.get('title', 'N/A')
    # After filling, venue info is typically in 'journal' or 'conference'
    venue = pub.get('journal', pub.get('conference', pub.get('venue', 'N/A')))
    authors = pub.get('author', 'N/A')
    year = pub.get('pub_year', pub.get('year', 'N/A'))
    
    # Construct the title field: paper title + venue + author list + year
    display_title = f"{paper_title}. {venue}. {authors}. {year}"
    
    formatted_publications.append({
        "title": display_title,
        "order": i + 1
    })

# Save the papers in JSON format
with open('_data/journals.json', 'w', encoding='utf-8') as f:
    json.dump(formatted_publications, f, indent=4, ensure_ascii=False)