## Manual

In [1]:
import pandas as pd
import json

In [2]:
# Load the CSV file
csv_file_path = "assets/data/Lab Member Info Survey Responses.csv"  # Update with correct path
df_csv = pd.read_csv(csv_file_path)

# Display the first few rows to understand its structure
df_csv.head()

Unnamed: 0,時間戳記,姓名（中英文皆可，中文為優先，例如：王小明）,系級（例如：112）,主要聯絡方式（請放常用Email，例如：nchu@gmail.com）,個人照片檔案名稱（例如：112王小明.png），另外請將照片寄給該屆網管,次要聯絡方式（請放FB或Linkedin連結）（若無，請留空白）,YouTube連結（若無，請留空白）,個人Github連結（若無，請留空白）,其他連結（例如：個人網站等）（若無，請留空白）,一段關於自己的描述（約30字左右，可以放研究領域等）,"關於你的關鍵字（約兩到三個，請用逗號隔開，例如：wsn, algorithm）",年級或職業稱謂\n1. 在校生：年級（例如：碩二）\n2. 已畢業：頭銜（例如：聯發科副理）,文憑
0,2025/3/27 下午 2:00:05,陳昱丞,112,scdev0210@gmail.com,陳昱丞.png,https://www.linkedin.com/in/yu-cheng-chen-460a...,https://www.youtube.com/@scdev-0210,https://github.com/sc0210,https://sc0210.netlify.app/,Focused on developing Metaheuristic algorithm ...,"wsn,algorithm",碩二,碩士
1,2025/3/27 下午 2:25:56,劉冠伯,113,g113056116@smail.nchu.edu.tw,113劉冠伯.png,https://www.linkedin.com/in/aidenliu,,https://github.com/waterbear-tw,,Engaged in reinforcement learning and resource...,,碩一,碩士
2,2025/3/27 下午 2:52:16,羅泓喆,112,g112056002@mail.nchu.edu.tw,能沿用嗎?,,,,,,"lidar,點雲分割",碩二,碩士
3,2025/3/27 下午 3:51:30,陳信宇,113,andy0413@hotmail.com.tw,113陳信宇.png,,,,,,wsn,碩一,碩士
4,2025/3/27 下午 4:02:10,陳珮禎,112,penny5462254@gmail.com,已有,,,,,Combining federated learning with Bayesian opt...,"NILM,FL",在校生碩二,碩士


In [3]:
# Rename columns if necessary (Ensure they match the JSON keys)
df = df_csv.rename(columns={
    "時間戳記": "timestamp",
    "姓名（中英文皆可，中文為優先，例如：王小明）": "name",
    "系級（例如：112）": "grade",
    "": "thesis",
    "主要聯絡方式（請放常用Email，例如：nchu@gmail.com）": "email",
    "個人照片檔案名稱（例如：112王小明.png），另外請將照片寄給該屆網管": "img",
    "次要聯絡方式（請放FB或Linkedin連結）（若無，請留空白）": "SocialLink",
    "個人Github連結（若無，請留空白）": "GithubLink",
    "其他連結（例如：個人網站等）（若無，請留空白）": "blinks",
    "一段關於自己的描述（約30字左右，可以放研究領域等）": "description",
    "關於你的關鍵字（約兩到三個，請用逗號隔開，例如：wsn, algorithm）": "keyword",
    "3. 年級或職業稱謂\n1. 在校生：年級（例如：碩二）\n2. 已畢業：頭銜（例如：聯發科副理）": "title",
    "4. 文憑": "degree"
})

# Fill NaN values with empty string for string fields
string_fields = ["name", "email", "img", "SocialLink", "ytlinks", "GithubLink", "blinks", "description", "title", "degree"]

df[string_fields] = df[string_fields].fillna("")

# Process degree
degree_map = {"碩士": "master", "博士": "phd", "學士": "bachelor"}
df["degree"] = df["degree"].map(degree_map).fillna(df["degree"])

# Process 'keyword' field: split by comma and strip spaces, keep empty list if no data
df["keyword"] = df["keyword"].apply(lambda x: [kw.strip() for kw in str(x).split(",") if kw.strip()] if pd.notna(x) else [])

# Ensure 'grade' is an integer where applicable, default to 0 if missing
df["grade"] = pd.to_numeric(df["grade"], errors="coerce").fillna(0).astype(int)

# Add image path dynamically
df["img"] = df["grade"].astype(str) + df["name"] + ".png"
df["img"] = "assets/img/member/" + df["img"]


KeyError: "['title', 'degree'] not in index"

In [None]:
# Convert DataFrame to a list of dictionaries
json_data = df.to_dict(orient="records")

# Remove empty fields dynamically from each entry
cleaned_json_data = [{k: v for k, v in entry.items() if v not in ["NaN", None]} for entry in json_data]

# Sort the JSON output by 'grade' in descending order
sorted_json_data = sorted(cleaned_json_data, key=lambda x: x["grade"], reverse=True)

# Export to JSON file
output_file = "assets/data/lab_members.json"
with open(output_file, "w", encoding="utf-8") as file:
    json.dump(sorted_json_data, file, ensure_ascii=False, indent=2)

print(f"JSON file exported successfully as {output_file}")

JSON file exported successfully as assets/data/lab_members.json


## Auto

In [2]:
import pandas as pd

# The URL you provided needs to be modified to download as CSV
url = "https://docs.google.com/spreadsheets/d/1tVpYkEHWnYz4qwDHcnqeAISTS7Xh2xKJHJEREa0_L7k/export?format=csv"

# Read the CSV directly into a pandas DataFrame
df_csv = pd.read_csv(url)

In [None]:
CURRENT_GRADE = 112 
# n8n 更新期間
# 當年度9~12更新memebr資訊 2025-1912=113

In [4]:
df_csv.columns

Index(['時間戳記', '1. 姓名（中英文皆可，中文為優先，例如：王小明）', '2. 系級（例如：112）',
       '6. 主要聯絡方式\n(請放常用Email，例如：nchu@gmail.com)',
       '7. 個人照片檔案名稱（例如：112王小明.png），另外請將照片寄給該屆網管',
       '8. 次要聯絡方式（請放FB或Linkedin連結）（若無，請留空白）', '9. 個人Github連結（若無，請留空白）',
       '10. 其他連結（例如：個人網站等）（若無，請留空白）', '11. 一段關於自己的描述（約30字左右，可以放研究領域等）',
       '12. 關於你的關鍵字（約兩到三個，請用逗號隔開，例如：wsn, algorithm）',
       '3. 年級或職業稱謂\n- 在校生：年級\n- 已畢業：頭銜（例如：聯發科副理）', '4. 文憑',
       '5. 論文永久網址（請查閱台灣碩博士論文知識加值系統）（注意：若在學同學請留空白）'],
      dtype='object')

In [5]:
import pandas as pd
import numpy as np
import json
import os,re


def process_lab_members_data(df_csv, current_grade=CURRENT_GRADE, output_file="assets/data/lab_members_new.json"):
    # Rename columns with error handling in case column names don't match exactly
    column_mapping = {
        "時間戳記" : "timestamp",
        "1. 姓名（中英文皆可，中文為優先，例如：王小明）": "name",
        "2. 系級（例如：112）": "grade",
        "3. 年級或職業稱謂\n- 在校生：年級\n- 已畢業：頭銜（例如：聯發科副理）": "title",
        "4. 文憑": "degree",
        "5. 論文永久網址（請查閱台灣碩博士論文知識加值系統）（注意：若在學同學請留空白）":"thesis",
        "6. 主要聯絡方式\n(請放常用Email，例如：nchu@gmail.com)": "email",
        "7. 個人照片檔案名稱（例如：112王小明.png），另外請將照片寄給該屆網管": "img",
        "8. 次要聯絡方式（請放FB或Linkedin連結）（若無，請留空白）": "SocialLink",
        "9. 個人Github連結（若無，請留空白）": "GithubLink",
        "10. 其他連結（例如：個人網站等）（若無，請留空白）": "blinks",
        "11. 一段關於自己的描述（約30字左右，可以放研究領域等）": "description",
        "12. 關於你的關鍵字（約兩到三個，請用逗號隔開，例如：wsn, algorithm）": "keyword"
    }
    
    # Print available columns for debugging
    print("Available columns in DataFrame:", df_csv.columns.tolist())
    
    

    # Create a dictionary of simplified column names to handle variations
    simplified_mapping = {}
    for full_name, short_name in column_mapping.items():
        if '姓名' in full_name:
            simplified_mapping['姓名'] = short_name
        elif '系級' in full_name:
            simplified_mapping['系級'] = short_name
        elif 'Email' in full_name.lower() or '聯絡方式' in full_name:
            simplified_mapping['聯絡'] = short_name
    
    # Try to rename columns, fall back to simplified mapping if exact match fails
    renamed_cols = {}
    for col in df_csv.columns:
        if col in column_mapping:
            renamed_cols[col] = column_mapping[col]
        else:
            # Try to find a match using simplified keys
            for simple_key, mapped_value in simplified_mapping.items():
                if simple_key in col:
                    renamed_cols[col] = mapped_value
                    break
    
    # Rename the columns we could match
    df = df_csv.rename(columns=renamed_cols)
    
    # Print renamed columns
    print("Columns after renaming:", df.columns.tolist())
    
    # ================ After Renaming the column ===================
    # Convert timestamp to datetime for sorting
    if "timestamp" in df_csv.columns:
        df_csv["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")

    # Drop duplicates, keeping the latest submission per name
    df = df.sort_values("timestamp", ascending=False).drop_duplicates(subset=["name", "email"], keep="first")
    
    # Make sure all required columns exist, create empty ones if not
    required_columns = ["timestamp", "grade", "degree", "title", "name" ,"thesis", "email", "img", "SocialLink","GithubLink", "blinks", "description", "keyword"]

    for col in required_columns:
        if col not in df.columns:
            df[col] = ""
    
    # Fill NaN values with appropriate defaults - FIXED to handle columns one by one
    string_fields = ["timestamp", "grade", "degree", "title", "name" ,"thesis", "email", "img", "SocialLink","GithubLink", "blinks", "description", "keyword"]

    # Fix: Fill NaN values column by column
    for col in string_fields:
        if col in df.columns:
            df[col] = df[col].fillna("")
    
    # Process degree with robust handling if column exists
    if "degree" in df.columns:
        degree_map = {
            "碩士": "master", 
            "博士": "phd", 
            "學士": "bachelor",
            "master": "master",
            "phd": "phd",
            "bachelor": "bachelor",
            "PhD": "phd",
            "Master": "master",
            "Bachelor": "bachelor"
        }
        
        df["degree"] = df["degree"].apply(
            lambda x: next((v for k, v in degree_map.items() if k in str(x)), str(x)) if pd.notna(x) else ""
        )
    
    # Process keywords if column exists
    if "keyword" in df.columns:
        def process_keywords(kw_str):
            if pd.isna(kw_str) or not kw_str:
                return []
            
            separators = [',', ';', '、']
            for sep in separators:
                if sep in str(kw_str):
                    return [k.strip() for k in str(kw_str).split(sep) if k.strip()]
            
            return [str(kw_str).strip()] if str(kw_str).strip() else []
        
        df["keyword"] = df["keyword"].apply(process_keywords)
    else:
        df["keyword"] = [[] for _ in range(len(df))]  # Fixed: use list comprehension instead of multiplication
    
    # Handle grade conversion if column exists
    if "grade" in df.columns:
        def convert_grade(grade_val):
            try:
                return int(float(grade_val))
            except (ValueError, TypeError):
                if isinstance(grade_val, str):
                    import re
                    nums = re.findall(r'\d+', grade_val)
                    if nums:
                        return int(nums[0])
                return 0
        
        df["grade"] = df["grade"].apply(convert_grade)
    else:
        df["grade"] = 0
    
    # Generate image paths
    if "name" in df.columns:
        def generate_img_path(row):
            if "img" in row and row["img"] and isinstance(row["img"], str) and len(row["img"]) > 5:
                img_path = row["img"]

                # Normalize path
                if not img_path.startswith("assets/"):
                    img_path = f"assets/img/member/{img_path}"
                
                # Change only .jpg or .jpeg extensions to .png
                base, ext = os.path.splitext(img_path)
                if ext.lower() in [".jpg", ".jpeg"]:
                    img_path = base + ".png"

                return img_path

            grade = str(row.get("grade", "")) if pd.notna(row.get("grade", "")) else ""
            name = str(row.get("name", "")) if pd.notna(row.get("name", "")) else "unknown"
            name = re.sub(r'[\\/*?:"<>|]', "", name)

            return f"assets/img/member/{grade}{name}.png"

        df["img"] = df.apply(generate_img_path, axis=1)
    
        required_output_fields = ["timestamp", "grade", "degree", "title", "name" ,"thesis", "email", "img", "SocialLink","GithubLink", "blinks", "description", "keyword"]

        def clean_entry(entry):
            cleaned = {}
            for field in required_output_fields:
                val = entry.get(field, "")
                if isinstance(val, float) and val.is_integer():
                    val = int(val)
                elif isinstance(val, (np.ndarray, pd.Series)):
                    val = val.tolist()
                cleaned[field] = val if val is not np.nan else ""
            return cleaned

    
    # Convert to records first, then clean each entry
    records = df.to_dict(orient="records")
    json_data = []
        # Categorize entries into studying and alumni
    studying, alumni = [], []
    for entry in records:
        try:
            cleaned = clean_entry(entry)
            grade = cleaned.get("grade", 0)
            if isinstance(grade, str):
                grade = int(re.findall(r'\d+', grade)[0]) if re.findall(r'\d+', grade) else 0
            if grade >= current_grade:
                studying.append(cleaned)
            else:
                alumni.append(cleaned)
        except Exception as e:
            print(f"Error processing entry: {e}")
            print(f"Problematic entry: {entry}")
            alumni.append({"name": entry.get("name", "Unknown"), "grade": entry.get("grade", 0)})

    # Sort each category by grade descending
    studying = sorted(studying, key=lambda x: x.get("grade", 0), reverse=True)
    alumni = sorted(alumni, key=lambda x: x.get("grade", 0), reverse=True)

    final_json = {"studying": studying, "alumni": alumni}
    
    # Ensure output directory exists
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    
    # Export to JSON with error handling
    try:
        with open(output_file, "w", encoding="utf-8") as file:
            json.dump(final_json, file, ensure_ascii=False, indent=2)
        print(f"JSON file exported successfully as {output_file}")
        return final_json
    except Exception as e:
        print(f"Error saving JSON file: {e}")
        fallback_file = "lab_members.json"
        with open(fallback_file, "w", encoding="utf-8") as file:
            json.dump(final_json, file, ensure_ascii=False, indent=2)
        print(f"JSON saved to fallback location: {fallback_file}")
        return final_json

In [5]:
process_lab_members_data(df_csv)

Available columns in DataFrame: ['時間戳記', '1. 姓名（中英文皆可，中文為優先，例如：王小明）', '2. 系級（例如：112）', '6. 主要聯絡方式\n(請放常用Email，例如：nchu@gmail.com)', '7. 個人照片檔案名稱（例如：112王小明.png），另外請將照片寄給該屆網管', '8. 次要聯絡方式（請放FB或Linkedin連結）（若無，請留空白）', '9. 個人Github連結（若無，請留空白）', '10. 其他連結（例如：個人網站等）（若無，請留空白）', '11. 一段關於自己的描述（約30字左右，可以放研究領域等）', '12. 關於你的關鍵字（約兩到三個，請用逗號隔開，例如：wsn, algorithm）', '3. 年級或職業稱謂\n- 在校生：年級\n- 已畢業：頭銜（例如：聯發科副理）', '4. 文憑', '5. 論文永久網址（請查閱台灣碩博士論文知識加值系統）（注意：若在學同學請留空白）']
Columns after renaming: ['timestamp', 'name', 'grade', 'email', 'img', 'SocialLink', 'GithubLink', 'blinks', 'description', 'keyword', 'title', 'degree', 'thesis']
JSON file exported successfully as assets/data/lab_members_new.json


{'studying': [{'timestamp': '2025/3/30 下午 10:30:02',
   'grade': 113,
   'degree': 'master',
   'title': '碩一',
   'name': '楊啟弘',
   'thesis': '',
   'email': 'young900221@gmail.com',
   'img': 'assets/img/member/113楊啟弘',
   'SocialLink': 'www.linkedin.com/in/yangchihung',
   'GithubLink': 'https://github.com/YANGCHIHUNG',
   'blinks': '',
   'description': '研究強化式學習與知識蒸餾的整合應用，同時學習嵌入式系統並應用，測試模型效能。',
   'keyword': ['Reinforcement Learning', 'Knowledge distillation']},
  {'timestamp': '2025/3/29 下午 7:14:14',
   'grade': 113,
   'degree': 'master',
   'title': '在校生',
   'name': '簡紹鈞',
   'thesis': '',
   'email': 'chienshaochun0421@gmail.com',
   'img': 'assets/img/member/113簡紹鈞.png',
   'SocialLink': '',
   'GithubLink': 'https://github.com/chienshaochun/',
   'blinks': '',
   'description': '目前在做diffusion model在NILM上模擬數據配合federated learning的研究',
   'keyword': ['NILM', 'LA']},
  {'timestamp': '2025/3/28 下午 1:14:52',
   'grade': 113,
   'degree': 'master',
   'title': '碩一',
   'name': '吳楚熙'

## Refactor

In [60]:
import pandas as pd
import numpy as np
import json
import os
import re


def get_column_mapping():
    return {
        "時間戳記" : "timestamp",
        "1. 姓名（中英文皆可，中文為優先，例如：王小明）": "name",
        "2. 系級（例如：112）": "grade",
        "3. 年級或職業稱謂\n- 在校生：年級\n- 已畢業：頭銜（例如：聯發科副理）": "title",
        "4. 文憑": "degree",
        "5. 論文永久網址（請查閱台灣碩博士論文知識加值系統）（注意：若在學同學請留空白）":"thesis",
        "6. 主要聯絡方式\n(請放常用Email，例如：nchu@gmail.com)": "email",
        "7. 個人照片檔案名稱（例如：112王小明.png），另外請將照片寄給該屆網管": "img",
        "8. 次要聯絡方式（請放FB或Linkedin連結）（若無，請留空白）": "SocialLink",
        "9. 個人Github連結（若無，請留空白）": "GithubLink",
        "10. 其他連結（例如：個人網站等）（若無，請留空白）": "blinks",
        "11. 一段關於自己的描述（約30字左右，可以放研究領域等）": "description",
        "12. 關於你的關鍵字（約兩到三個，請用逗號隔開，例如：wsn, algorithm）": "keyword"
    }

def rename_columns(df, column_mapping):
    simplified_mapping = {
        '姓名': 'name', '系級': 'grade', '聯絡': 'email'
    }
    renamed_cols = {}
    for col in df.columns:
        if col in column_mapping:
            renamed_cols[col] = column_mapping[col]
        else:
            for simple_key, mapped_value in simplified_mapping.items():
                if simple_key in col:
                    renamed_cols[col] = mapped_value
                    break
    return df.rename(columns=renamed_cols)

def preprocess_dataframe(df):
    if "timestamp" in df.columns:
        # Replace '下午' with 'PM' and '上午' with 'AM' for correct parsing
        df["timestamp"] = df["timestamp"].str.replace("下午", "PM").str.replace("上午", "AM")

        # Convert to datetime with a custom format
        df["timestamp"] = pd.to_datetime(df["timestamp"], format="%Y/%m/%d %p %I:%M:%S", errors="coerce")
        
        # Debugging: Check if any valid timestamps exist
        print("Valid timestamps:", df["timestamp"].notna().sum())  # This will print the count of valid timestamps
        
        # For NaT values, replace them with a placeholder like "N/A"
        df["timestamp"] = df["timestamp"].fillna("N/A")
    
    df = df.sort_values("timestamp", ascending=False).drop_duplicates(subset=["name", "email"], keep="first")
    
    required_columns = ["timestamp","name", "title", "degree", "grade", "thesis", "img", "email", "GithubLink","SocialLink", "blinks","keyword", "description"]
    for col in required_columns:
        if col not in df.columns:
            df[col] = ""
        else:
            df[col] = df[col].fillna("")
    return df

def normalize_degree(degree):
    mapping = {
        "碩士": "master", "博士": "phd", "學士": "bachelor",
        "master": "master", "phd": "phd", "bachelor": "bachelor",
        "PhD": "phd", "Master": "master", "Bachelor": "bachelor"
    }
    for k, v in mapping.items():
        if k in str(degree):
            return v
    return str(degree)

def split_keywords(kw_str):
    if pd.isna(kw_str) or not kw_str:
        return []
    for sep in [',', ';', '、']:
        if sep in kw_str:
            return [k.strip() for k in kw_str.split(sep) if k.strip()]
    return [kw_str.strip()] if kw_str.strip() else []

def convert_grade(grade_val):
    try:
        return int(float(grade_val))
    except (ValueError, TypeError):
        nums = re.findall(r'\d+', str(grade_val))
        return int(nums[0]) if nums else 0

import os
import re

def generate_img_path(row):
    img_base_path = "assets/img/member/"
    
    # Step 1: Check for explicitly given img field
    img_field = row.get("img", "")
    if isinstance(img_field, str) and len(img_field.strip()) > 5:
        # Ensure correct base path
        img_path = img_field if img_field.startswith("assets/") else os.path.join(img_base_path, img_field)
        
        # If no extension, try appending common ones
        if not os.path.splitext(img_path)[1]:
            for ext in [".png", ".jpg", ".jpeg"]:
                full_path = img_path + ext
                if os.path.isfile(full_path):
                    return full_path
        else:
            # If file exists with given extension
            if os.path.isfile(img_path):
                return img_path
            # Try .png fallback if extension is .jpg/.jpeg
            base, ext = os.path.splitext(img_path)
            if ext.lower() in [".jpg", ".jpeg"]:
                png_path = base + ".png"
                if os.path.isfile(png_path):
                    return png_path

    # Step 2: Try building path from grade and name
    grade = str(row.get("grade", ""))
    name = re.sub(r'[\\/*?:"<>|]', "", str(row.get("name", "unknown")))
    base_path = os.path.join(img_base_path, f"{grade}{name}")
    
    for ext in [".png", ".jpg", ".jpeg"]:
        full_path = base_path + ext
        if os.path.isfile(full_path):
            return full_path

    # Step 3: Nothing valid found, return blank
    return ""


def clean_entry(entry, fields):
    cleaned = {}
    for field in fields:
        val = entry.get(field, "")
        if isinstance(val, float) and val.is_integer():
            val = int(val)
        elif isinstance(val, (np.ndarray, pd.Series)):
            val = val.tolist()
        cleaned[field] = val if val is not np.nan else ""
    return cleaned

def categorize_members(records, current_grade, fields):
    studying, alumni = [], []
    for entry in records:
        try:
            cleaned = clean_entry(entry, fields)
            grade = cleaned.get("grade", 0)
            if isinstance(grade, str):
                nums = re.findall(r'\d+', grade)
                grade = int(nums[0]) if nums else 0
            if grade >= current_grade:
                studying.append(cleaned)
            else:
                alumni.append(cleaned)
        except Exception as e:
            print(f"Error processing entry: {e}")
            alumni.append({"name": entry.get("name", "Unknown"), "grade": entry.get("grade", 0)})
    return (
        sorted(studying, key=lambda x: x.get("grade", 0), reverse=True),
        sorted(alumni, key=lambda x: x.get("grade", 0), reverse=True)
    )



In [63]:
def save_to_json(data, output_file):
    # Step 1: Format timestamps
    for category in ['studying', 'alumni']:
        if category in data:
            for record in data[category]:
                if isinstance(record.get("timestamp"), pd.Timestamp):
                    record["timestamp"] = record["timestamp"].strftime('%Y-%m-%d %H:%M:%S')

    # Step 2: Load existing data or initialize
    if os.path.isfile(output_file):
        with open(output_file, "r", encoding="utf-8") as f:
            try:
                existing_data = json.load(f)
            except json.JSONDecodeError:
                existing_data = {"studying": [], "alumni": []}
    else:
        existing_data = {"studying": [], "alumni": []}

    # Step 3: Replace based on name key
    final_data = {}
    for category in ['studying', 'alumni']:
        # Convert existing and new lists to dicts keyed by 'name'
        existing_dict = {m['name']: m for m in existing_data.get(category, [])}
        new_dict = {m['name']: m for m in data.get(category, [])}
        
        # Update existing with new (replace if name matches)
        existing_dict.update(new_dict)
        
        # Store merged list back
        final_data[category] = list(existing_dict.values())

    # Step 4: Save merged result
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    try:
        with open(output_file, "w", encoding="utf-8") as file:
            json.dump(final_data, file, ensure_ascii=False, indent=2)
        print(f"JSON exported to {output_file}")
    except Exception as e:
        print(f"Error saving JSON: {e}")
        fallback = "lab_members.json"
        with open(fallback, "w", encoding="utf-8") as file:
            json.dump(final_data, file, ensure_ascii=False, indent=2)
        print(f"Fallback JSON saved to {fallback}")

In [68]:
def process_lab_members_data(df_csv, current_grade, output_file="assets/data/lab_members_new.json"):
    column_mapping = get_column_mapping()
    df = rename_columns(df_csv, column_mapping)
    df = preprocess_dataframe(df)
    df = df[df["degree"] != "在職碩士"] # 在職 require manually append

    df["degree"] = df["degree"].apply(normalize_degree)
    df["keyword"] = df["keyword"].apply(split_keywords)
    df["grade"] = df["grade"].apply(convert_grade)
    df["img"] = df.apply(generate_img_path, axis=1)

    fields = ["timestamp","name", "title", "degree", "grade", "thesis", "img", "email", "GithubLink","SocialLink", "blinks","keyword", "description"]

    records = df.to_dict(orient="records")
    studying, alumni = categorize_members(records, current_grade, fields)
    final_data = {"studying": studying, "alumni": alumni}
    save_to_json(final_data, output_file)
    return final_data


In [None]:
process_lab_members_data(df_csv, 112, "test.josn")

Valid timestamps: 13
JSON exported to assets/data/lab_members_new.json


{'studying': [{'timestamp': '2025-03-30 22:30:02',
   'name': '楊啟弘',
   'title': '碩一',
   'degree': 'master',
   'grade': 113,
   'thesis': '',
   'img': 'assets/img/member/113楊啟弘.png',
   'email': 'young900221@gmail.com',
   'GithubLink': 'https://github.com/YANGCHIHUNG',
   'SocialLink': 'www.linkedin.com/in/yangchihung',
   'blinks': '',
   'keyword': ['Reinforcement Learning', 'Knowledge distillation'],
   'description': '研究強化式學習與知識蒸餾的整合應用，同時學習嵌入式系統並應用，測試模型效能。'},
  {'timestamp': '2025-03-29 19:14:14',
   'name': '簡紹鈞',
   'title': '在校生',
   'degree': 'master',
   'grade': 113,
   'thesis': '',
   'img': 'assets/img/member/113簡紹鈞.png',
   'email': 'chienshaochun0421@gmail.com',
   'GithubLink': 'https://github.com/chienshaochun/',
   'SocialLink': '',
   'blinks': '',
   'keyword': ['NILM', 'LA'],
   'description': '目前在做diffusion model在NILM上模擬數據配合federated learning的研究'},
  {'timestamp': '2025-03-28 13:14:52',
   'name': '吳楚熙',
   'title': '碩一',
   'degree': 'master',
   'grade': 113