In [1]:
import pandas as pd
import json

# Data Preprocessing

In [2]:
with open("data/program.json", "r") as f:
    program = json.load(f)

In [3]:
program.keys()

dict_keys(['A1 - GenAI-Enhanced Communication', 'B1 - Healthcare and Human Wellbeing', 'A2 - Interactive Systems and Data Visualization', 'B2 - New Media and Research Inspirations', 'A3 - Technology-Enhanced Learning and Heritage', 'B3 - Interaction in VR/AR'])

In [4]:
program_df = pd.DataFrame([
    {"session": key, "title": value}
    for key, values in program.items()
    for value in values
])
program_df.head()

Unnamed: 0,session,title
0,A1 - GenAI-Enhanced Communication,[CHI25] Rambler in the Wild: A Diary Study of ...
1,A1 - GenAI-Enhanced Communication,[CHI25] Scaffolded Turns and Logical Conversat...
2,A1 - GenAI-Enhanced Communication,"[CHI25] ""Ronaldo's a poser!"": How the Use of G..."
3,A1 - GenAI-Enhanced Communication,[CHI25] JournalAIde: Empowering Older Adults i...
4,A1 - GenAI-Enhanced Communication,[CHI25] HarmonyCut: Supporting Creative Chines...


In [5]:
program_df["conference"] = program_df["title"].str.split("] ").map(lambda x: x[0][1:])
program_df["title"] = program_df["title"].str.split("] ").map(lambda x: x[1])

program_df.head()

Unnamed: 0,session,title,conference
0,A1 - GenAI-Enhanced Communication,Rambler in the Wild: A Diary Study of LLM-Assi...,CHI25
1,A1 - GenAI-Enhanced Communication,Scaffolded Turns and Logical Conversations: De...,CHI25
2,A1 - GenAI-Enhanced Communication,"""Ronaldo's a poser!"": How the Use of Generativ...",CHI25
3,A1 - GenAI-Enhanced Communication,JournalAIde: Empowering Older Adults in Digita...,CHI25
4,A1 - GenAI-Enhanced Communication,HarmonyCut: Supporting Creative Chinese Paper-...,CHI25


In [6]:
talk_df = pd.read_excel("data/Talk and Poster.xlsx", sheet_name="Talk")
registrant_df = pd.read_excel("data/PreCHI Participants.xlsx", sheet_name="Registrant")
sv_df = pd.read_excel("data/PreCHI Participants.xlsx", sheet_name="Student Volunteer")
oc_df = pd.read_excel("data/PreCHI Participants.xlsx", sheet_name="Organizing Committee")
guest_talk_df = pd.read_excel("data/Talk and Poster.xlsx", sheet_name="Guest Talk")
guest_df = pd.read_excel("data/PreCHI Participants.xlsx", sheet_name="Guest")

In [7]:
# Step 1: 合并所有 Talk 数据
all_talks = pd.concat([talk_df, guest_talk_df], ignore_index=True)

In [None]:
all_talks

Unnamed: 0,ID,Name,Email,Affiliation,Title,Abstract
0,1.0,Zhida Sun,zhida.sun@connect.ust.hk,Shenzhen University,[CHI25] Creative Blends of Visual Concepts,Visual blends combine elements from two distin...
1,2.0,Runze Cai,runze.cai@u.nus.edu,National University of Singapore,[CHI25] AiGet: Transforming Everyday Moments i...,"Unlike the free exploration of childhood, the ..."
2,3.0,Xi Zheng,zheng.xi@my.cityu.edu.hk,City University of Hong Kong,[CHI25] Customizing Emotional Support: How Do ...,Personalized support is essential to fulfill i...
3,4.0,Hanfang Lyu ¦ 吕涵放,hanfang.lyu@connect.ust.hk,The Hong Kong University of Science and Techno...,[CHI25] Signaling Human Intentions to Service ...,"As service robots become commonplace, it is es..."
4,5.0,Runhua ZHANG ¦ 张润花,runhua.zhang@connect.ust.hk,The Hong Kong University of Science and Techno...,[CHI 25] Walk in Their Shoes to Navigate Your ...,"Procrastination, the voluntary delay of tasks ..."
5,6.0,Xiang (Nathan) Qi,nathanxiang.qi@connect.polyu.hk,The Hong Kong Polytechnic University,[CHI25] Participatory Design in Human-Computer...,Participatory Design (PD) has become increasin...
6,7.0,Fan Zhang,zfan1218@gmail.com,City University Of Hong Kong,"[CHI25] ""Becoming My Own Audience"": How Dancer...",The use of motion capture in live dance perfor...
7,8.0,Yuhao Sun,yuhao.sun@ed.ac.uk,University of Edinburgh,[CHI25] Human-Precision Medicine Interaction: ...,Precision Medicine (PM) transforms the traditi...
8,9.0,Yuanhao ZHANG,yzhangiy@connect.ust.hk,HKUST,[CHI25] CoKnowledge: Supporting Assimilation o...,"Danmaku, a system of scene-aligned, time-synce..."
9,10.0,Yanna Lin,cseyanna@ust.hk,HKUST,[CHI25] InterLink: Linking Text with Code and ...,"Computational notebooks, widely used for ad-ho..."


In [9]:
# Step 2: 合并 Registrant 和 Guest 数据，建立全局映射
registrant_guest = pd.concat([guest_df, sv_df, oc_df, registrant_df], ignore_index=True)
email_to_name = registrant_guest.set_index("Email")["Name"].to_dict()
email_to_org = registrant_guest.set_index("Email")["Organization"].to_dict()

In [10]:
# Step 3: 通过 Email 匹配 Name 和 Organization
all_talks["speaker"] = all_talks["Email"].map(email_to_name)
all_talks["affiliation"] = all_talks["Email"].map(email_to_org)

In [11]:

def process_speakers(title):
    """基于标题匹配演讲者信息"""
    # 优先在registrant_df中查找
    all_talks_match = all_talks[all_talks["Title"].str.contains(title, na=False, regex=False)].fillna("TBD")
    if not all_talks_match.empty:
        return pd.Series({
            "speaker": all_talks_match.iloc[0]["speaker"],  
            "affiliation": all_talks_match.iloc[0]["affiliation"],
            "abstract": all_talks_match.iloc[0]["Abstract"],
        })
    
    # 未找到的case
    return pd.Series({
        "speaker": None,
        "affiliation": None,
        "abstract": None,
    })

In [12]:
# Step 2: 找到对应的speaker信息**
program_df[["speaker", "affiliation", "abstract"]] = program_df["title"].apply(process_speakers)

In [13]:
program_df[program_df["affiliation"].isnull()]

Unnamed: 0,session,title,conference,speaker,affiliation,abstract


In [14]:
print(program_df.head(10))

                               session  \
0    A1 - GenAI-Enhanced Communication   
1    A1 - GenAI-Enhanced Communication   
2    A1 - GenAI-Enhanced Communication   
3    A1 - GenAI-Enhanced Communication   
4    A1 - GenAI-Enhanced Communication   
5    A1 - GenAI-Enhanced Communication   
6    A1 - GenAI-Enhanced Communication   
7    A1 - GenAI-Enhanced Communication   
8  B1 - Healthcare and Human Wellbeing   
9  B1 - Healthcare and Human Wellbeing   

                                               title  conference  \
0  Rambler in the Wild: A Diary Study of LLM-Assi...       CHI25   
1  Scaffolded Turns and Logical Conversations: De...       CHI25   
2  "Ronaldo's a poser!": How the Use of Generativ...       CHI25   
3  JournalAIde: Empowering Older Adults in Digita...       CHI25   
4  HarmonyCut: Supporting Creative Chinese Paper-...       CHI25   
5  ACKnowledge: A Computational Framework for Hum...       CHI25   
6  Exploring the Design of LLM-based Agent in Enh...       CH

黄泽宇 [LT-17 Session 1]
王梁炜 [LT-18 Session 1] 

Xi Zheng [LT-17 Session 2]
Hanfang Lyu [LT-18 Session 2]

陈育安 [LT-17 Session 3]
许愿 [LT-18 Session 3]

徐懿 [LT-15 Panel 1]
黄庄桐[LT-15 Panel 2]


In [15]:
session_host_dict = dict(
zip(program.keys(), ["Zeyu Huang", "Liangwei Wang", "Xi Zheng", "Hanfang Lyu", "Yu'an Chen", "Yuan Xu"])
)
session_host_dict

{'A1 - GenAI-Enhanced Communication': 'Zeyu Huang',
 'B1 - Healthcare and Human Wellbeing': 'Liangwei Wang',
 'A2 - Interactive Systems and Data Visualization': 'Xi Zheng',
 'B2 - New Media and Research Inspirations': 'Hanfang Lyu',
 'A3 - Technology-Enhanced Learning and Heritage': "Yu'an Chen",
 'B3 - Interaction in VR/AR': 'Yuan Xu'}

# Generate YAML

In [16]:
import re
import yaml

def generate_id(title, conference):
    """根据标题和会议类型生成带后缀的ID"""
    # 基础处理
    id_str = title.lower()
    id_str = re.sub(r'[^\w\s-]', '', id_str)
    id_str = re.sub(r'[\s_]+', '-', id_str)
    id_str = re.sub(r'-+', '-', id_str).strip('-')
    
    # 添加会议类型后缀
    conference_suffix = ""
    if conference.lower() == "guest talk":
        conference_suffix = "-guest-talk"
    elif conference.lower() == "rep":
        conference_suffix = "-rep"
    
    return f"#{id_str}{conference_suffix}"

def generate_session_metadata(session_name_full):
    """根据Session名称生成name、location和link"""
    # 提取Session编号（如"A1"或"B1"）
    session_name = session_name_full.split(" - ")[-1].strip()  # 提取 "GenAI-Enhanced Communication"
    session_code = session_name_full.split(" - ")[0].strip()
    # 分配地点
    location = "Yeung LT-17" if session_code.startswith("A") else "Yeung LT-18"
    return {
        "name": f"Session {session_code}",
        "location": location,
        "topic": session_name,
        "host": session_host_dict.get(session_name_full, "TBD"),  # 从session_host_dict中获取主持人
        "link": f"program/session_{session_code.lower()}"
    }
    
def format_speaker_name(name):
    """统一姓名格式：每个部分首字母大写，其余小写"""
    return ' '.join([part.strip().capitalize() for part in name.split()])

# 生成YAML结构
sessions = []
for session_group in program_df.groupby("session"):
    session_data = generate_session_metadata(session_group[0])
    session_data["talks"] = []
    for _, row in session_group[1].iterrows():
        full_title = f"[{row['conference']}] {row['title']}"
        session_data["talks"].append({
            "id": generate_id(row["title"], row["conference"]),
            "title": full_title,
            "speaker": format_speaker_name(row["speaker"])
        })
    
    sessions.append(session_data)

# 导出YAML
with open("program.yml", "w") as f:
    yaml.dump(sessions, f, sort_keys=False, allow_unicode=True)

In [17]:
sessions

[{'name': 'Session A1',
  'location': 'Yeung LT-17',
  'topic': 'GenAI-Enhanced Communication',
  'host': 'Zeyu Huang',
  'link': 'program/session_a1',
  'talks': [{'id': '#rambler-in-the-wild-a-diary-study-of-llm-assisted-writing-with-speech',
    'title': '[CHI25] Rambler in the Wild: A Diary Study of LLM-Assisted Writing With Speech',
    'speaker': 'Xuyu Yang'},
   {'id': '#scaffolded-turns-and-logical-conversations-designing-humanized-llm-powered-conversational-agents-for-hospital-admission-interviews',
    'title': '[CHI25] Scaffolded Turns and Logical Conversations: Designing Humanized LLM-Powered Conversational Agents for Hospital Admission Interviews',
    'speaker': 'Dingdong Liu'},
   {'id': '#ronaldos-a-poser-how-the-use-of-generative-ai-shapes-debates-in-online-forums',
    'title': '[CHI25] "Ronaldo\'s a poser!": How the Use of Generative AI Shapes Debates in Online Forums',
    'speaker': 'Yuhan Zeng'},
   {'id': '#journalaide-empowering-older-adults-in-digital-journal-w

# Generate Markdown

In [19]:
import os
import re
from pathlib import Path

# 创建输出目录
output_dir = Path("sessions")
output_dir.mkdir(exist_ok=True)

def format_talk_title(title, conference):
    """处理标题格式：移除原始会议标记，并根据类型添加后缀"""
    # 移除原始会议标记（如 [CHI25]）
    clean_title = re.sub(r'^$$.*?$$\s*', '', title)
    
    # 根据会议类型添加后缀
    if conference == "Guest Talk":
        return f"{clean_title} [Guest Talk]"
    elif conference == "REP":
        return f"{clean_title} [REP]"
    else:
        return clean_title

def generate_md(session_data):
    """生成单个session的Markdown内容"""
    content = f"## {session_data['topic']}\n\n"
    content += f"​**​Session Host**​: {session_data.get('host', 'TBD')}\n\n"
    
    for talk in session_data['talks']:
        # 标题部分
        title_line = f"### {format_talk_title(talk['title'], talk['conference'])}"
        content += f"{title_line}\n\n"
        
        # 演讲者信息
        content += f"​**​Speaker​**​: {talk['speaker']}"
        if talk.get('affiliation'):
            content += f", *{talk['affiliation']}*"
        content += "\n\n"
        
        # 摘要部分
        content += f"​**​Abstract​**​:\n {talk['abstract']}\n\n\n"  # 留空或添加占位符
        
    return content


sessions = []
for session_group in program_df.groupby("session"):
    session_data = generate_session_metadata(session_group[0])
    session_data["talks"] = []
    for _, row in session_group[1].iterrows():
        session_data["talks"].append({
            "title": row['title'],
            "conference": row['conference'],
            "speaker": format_speaker_name(row["speaker"]),
            "affiliation": row["affiliation"],
            "abstract": row['abstract'] if row['abstract'] != "nan" else "TBD",
        })
    
    sessions.append(session_data)
    

# 遍历每个session生成文件
for session in sessions:
    # 生成安全文件名
    filename = re.sub(r'[^\w\s-]', '', session['name']).strip().lower()
    filename = re.sub(r'[-\s]+', '-', filename) + ".md"
    
    # 写入文件
    with open(output_dir / filename, 'w', encoding='utf-8') as f:
        f.write(generate_md(session))