In [1]:
import json
import os
import re
import pandas as pd

from jinja2 import Environment, FileSystemLoader
import requests  # to scrap papers
import mammoth # convertion of docx to html

import requests_openalex  # to scrap papers
from linkify import linkify_names # add links when names mentionned
from requests_openalex import get_author_works

file_loader = FileSystemLoader('templates')
env = Environment(loader=file_loader)

# Data

In [4]:
# load in the data
people = pd.read_csv('data/people.csv').iloc[:, 1:]

# replace mention of names with links
people = linkify_names(people)

# In the 7th field of data/people.csv, there is a listing of the person's background
# below is a mapping to match a background to an emoji 
background_map = {
    'Bioinf': '🧬',
    'Biologist': '🦠',
    'Math': '🧮',
    'CS': '💻'
}

def parse_background(val):
    if isinstance(val, str):
        labels = [item.strip() for item in val.split(',')]
        return [(label, background_map.get(label, '❓')) for label in labels]
    return []

people["background"] = people["background"].apply(parse_background)


# Parse publications

In [5]:
# Some of us have homonyms, those below do not (and have publications)
papers = []
authors = ['Vincent Detours', 'Maxime Tarabichi', 'Ruben Lattuca', 'Valeriia Gulaia', 'Oier Azurmendi Senar']
for person in authors:
    papers.append(get_author_works(person))
    
papers = pd.concat(papers)
papers = papers.sort_values('cited_by_count', ascending=False) \
                      .drop_duplicates(subset=['title', 'person'], keep='first')

Found Author: Vincent Detours : https://openalex.org/A5009797000
Found Author: Maxime Tarabichi : https://openalex.org/A5045412483
Found Author: Ruben Lattuca : https://openalex.org/A5049254340
Found Author: Valeriia Gulaia : https://openalex.org/A5007022413
Found Author: Oier Azurmendi Senar : https://openalex.org/A5091982381


# Individual person page generation

In [6]:
env = Environment(loader=FileSystemLoader("templates"))
template = env.get_template("person_template.html")

# Generate HTML files
for _, person_row in people.iterrows():
    abrev = person_row['abrev']
    filename = f"{abrev}.html"
    output_path = '../'+filename

    # Filter publications for this person
    person_papers = papers[papers["person"] == person_row['name']].to_dict(orient='records')

    # Render template
    rendered = template.render(
        person=person_row.to_dict(),
        publications=person_papers
    )

    # Save HTML
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(rendered)


# People listing 

In [7]:
# Generate people.html
template = env.get_template('people.html')
html_content = template.render(person=people.to_dict(orient='records'))

with open('../people.html', 'w', encoding='utf-8') as f:
    f.write(html_content)


# Homepage

In [8]:
# Generate index.html
template = env.get_template('index.html')
html_content = template.render()
with open('../index.html', 'w', encoding='utf-8') as f:
    f.write(html_content)

# Guidelines

In [9]:
# Generate guidelines.html
templateGuildelines = env.get_template('guidelines.html')
with open("data/SAFE.docx", "rb") as docx_file:
    result = mammoth.convert_to_html(docx_file)
    doc_html = result.value  # HTML string extracted from docx

rendered_html = templateGuildelines.render(doc_html=doc_html)

with open('../guidelines.html', 'w', encoding='utf-8') as f:
    f.write(rendered_html)

# Publications

In [10]:
# Generate publications.html
papers_sorted = papers.sort_values('cited_by_count', ascending=False)

grouped = papers_sorted.groupby(['title', 'journal', 'year', 'doi'], as_index=False).agg({
    'person': lambda authors: ', '.join(sorted(set(authors)))
})

grouped_sorted = grouped.sort_values('year', ascending=False)

publications = grouped_sorted.to_dict(orient='records')

template = env.get_template('publications.html')

html_content = template.render(publications=publications)

with open('../publications.html', 'w', encoding='utf-8') as f:
    f.write(html_content)