In [10]:
import pandas as pd
import json

CSV_file = 'Pruefungsplan_PO19_2025'
# --- Step 1: Read the raw CSV with semicolon delimiter ---
with open(CSV_file+'.csv', encoding='utf-8') as f:
    lines = f.readlines()

# Extract metadata from the first 3 rows
metadata = {
    "program_info": {
        "exam_plan": lines[1].split(';')[0],           # "Prüfungsplan für das WiSe 2025/2026 des Fachbereichs AING"
        "bachelor": lines[2].split(';')[0],           # "alle Bachelor-Studiengänge (PO 2019)"
        "master": lines[3].split(';')[0],             # "Master-Studiengang ET/IT (PO 2021)"
        "last_update": lines[1].split(';')[-2] if len(lines[1].split(';')) > 1 else ""
    }
}

# --- Step 2: Read the table starting at row 4 ---
df = pd.read_csv(
    CSV_file+'.csv',
    sep=';',              # semicolon delimiter
    skiprows=4,           # skip metadata
    engine='python',      # Python engine handles irregular rows better
    dtype=str             # read everything as string
)

# Strip spaces in column names
df.columns = df.columns.str.strip()

# Keep only the first 10 columns (your real table)
df = df.iloc[:, :10]

# Strip spaces from all string cells
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

# --- Step 3: Build exams list ---
import math

exams = []
for _, row in df.iterrows():
    if pd.isna(row.iloc[0]) or row.iloc[0] == "":
        continue
    
    # Ensure each row has exactly 10 columns (fill missing with None)
    row_data = list(row) + [None] * (10 - len(row))
    
    # Convert NaN strings to None
    row_data = [None if (x is None or (isinstance(x, float) and math.isnan(x))) else x for x in row_data]

    exam = {
        "course": row_data[0],
        "pnr": row_data[1],
        "exam_date": row_data[2],
        "duration": row_data[3],
        "start_time": row_data[4],
        "end_time": row_data[5],
        "examiner": row_data[6],
        "second_examiner": row_data[7],
        "registration_deadline": row_data[8],
        "withdrawal_deadline": row_data[9]
    }
    exams.append(exam)


# --- Step 4: Save as JSON ---
result = {
    "metadata": metadata,
    "exams": exams
}

with open(f'{CSV_file}.json', 'w', encoding='utf-8') as f:
    json.dump(result, f, ensure_ascii=False, indent=4)

print(f'JSON file created successfully on {CSV_file}.json !')


JSON file created successfully on Pruefungsplan_PO19_2025.json !


  df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
