In [1]:
import os
import pandas as pd
import json

def read_excel_files(folder_path):
    data = {}
    
    for filename in os.listdir(folder_path):
        if filename.endswith(".xlsx") or filename.endswith(".xls"):
            year = extract_year_from_filename(filename)
            file_path = os.path.join(folder_path, filename)
            df = pd.read_excel(file_path)
            cleaned_df = clean_excel(df)
            data[year] = df_to_json(cleaned_df)
    
    return data

'''
def extract_year_from_filename(filename):
    if "SoSe" in filename:
        year = filename.split("SoSe")[1].split(".")[0].strip()
    elif "WiSe" in filename:
        year = filename.split("WiSe")[1].split(".")[0].strip()
    else:
        year = filename.split(".")[0].strip()
    return year
'''

def extract_year_from_filename(filename):
    if "Immatrikulierte Studierende" in filename:
        year = filename.split("Studierende")[1].split(".")[0].strip()
    else:
        year = filename.split(".")[0].strip()
    return year


def clean_excel(df):
    start_index = find_table_start(df)
    if start_index is not None:
        df.columns = df.iloc[start_index]  # Setze die erste Zeile als Spaltennamen
        return df.iloc[start_index + 1:]   # Ignoriere die Kopfzeile und gib den Rest zurück
    else:
        return df

def find_table_start(df):
    for index, row in df.iterrows():
        if "St-Ort" in row.values:
            return index
    return None

def df_to_json(df):
    json_data = []
    
    for _, row in df.iterrows():
        entry = {}
        for column, value in row.items():
            key = str(column)
            if isinstance(value, pd.Timestamp):
                entry[key] = value.strftime("%Y-%m-%d %H:%M:%S")
            else:
                entry[key] = value
        json_data.append(entry)
    
    return json_data

# Beispielaufruf
folder_path = "Studierendendaten_HSB-gesamt/"
data = read_excel_files(folder_path)
print(json.dumps(data, indent=4))


# Pfad zur Ausgabedatei festlegen
output_file_path = "Fakultätsdaten.json"

# JSON-Daten in Datei schreiben
with open(output_file_path, "w") as output_file:
    json.dump(data, output_file, indent=4)

print("Daten erfolgreich in", output_file_path, "gespeichert.")

{
    "SoSe 2014": [
        {
            "St-Ort": "IG",
            "FAK": "1 ",
            "Stg-Nr": "713",
            "Studiengang-Drucktext": "East Asian Management MBA",
            "Abschluss": "Master",
            "Stg-Kurztext": "EAM MBA   ",
            "HSB-Gesamt": 6,
            "D-Gesamt": 4,
            "M\u00e4nnlich-D": 3,
            "Weiblich-D": 1,
            "A-Gesamt": 2,
            "M\u00e4nnlich-A": 2,
            "Weiblich-A": 0,
            "BeurlaubteGesamt": 0,
            "Beurlaubte-M": 0,
            "Beurlaubte-W": 0,
            "StdAnfGesamt": 0,
            "StdAnf-M": 0,
            "StdAnf-W": 0,
            "FAKsort": "1"
        },
        {
            "St-Ort": "WS",
            "FAK": "1 ",
            "Stg-Nr": "714",
            "Studiengang-Drucktext": "DS Betriebswirtschaft BA ",
            "Abschluss": "Bachelor",
            "Stg-Kurztext": "DSBW BA   ",
            "HSB-Gesamt": 82,
            "D-Gesamt": 81,
            "M\u00e4