In [6]:
import pandas as pd

# --- CONFIG ---
INPUT_FILE = "nodes_and_links.ods"   # change path if needed
SHEET_NAME = "Artifacts"
DELIM = ";"            # your requested delimiter
OUTPUT_FILE = "artifacts_expanded.ods"  #

# Load the sheet
df = pd.read_excel("nodes_and_links.ods", sheet_name="Artifacts", engine="odf")

# assume ID is column 0, the two multi-line columns are column 1 and 2
id_col = df.columns[0]
col2 = df.columns[1]
#col3 = df.columns[2]

# --- prepare split lists: handle NaN, split, and strip whitespace ---
def safe_split(series, delim):
    return (
        series.fillna("")                        # replace NaN with empty string
              .astype(str)                       # in case there are numbers
              .apply(lambda s: [p.strip() for p in s.split(delim)])  # split & strip
    )

df[col2 + "_split"] = safe_split(df[col2], DELIM)
#df[col3 + "_split"] = safe_split(df[col3], DELIM)

# --- function to expand a single row into a DataFrame ---
def expand_row(row):
    a = row[col2 + "_split"]
    #b = row[col3 + "_split"]
    # ensure lists
    if not isinstance(a, list): a = [a]
    #if not isinstance(b, list): b = [b]
    max_len = len(a)
    # pad shorter with empty strings so they align
    a = a + [""] * (max_len - len(a))
    #b = b + [""] * (max_len - len(b))
    return pd.DataFrame({
        id_col: [row[id_col]] * max_len,
        col2: a,
        #col3: b
    })

# --- APPLY row-by-row and collect into a LIST (important: list, not Series) ---
expanded_dfs = [expand_row(row) for _, row in df.iterrows()]   # list comprehension

# If you prefer apply(), ensure you convert to list:
# expanded_dfs = df.apply(expand_row, axis=1).tolist()

# --- CONCAT the list of DataFrames ---
expanded_df = pd.concat(expanded_dfs, ignore_index=True)

# --- OPTIONAL: show rows where original split lengths differed (warning) ---
#mismatch_mask = df[col2 + "_split"].apply(len) != df[col3 + "_split"].apply(len)
#if mismatch_mask.any():
 #   print("Warning: some rows had differing number of items in Col2")
 #  print("Rows with mismatches (index, len_col2:")
 #   for i in df[mismatch_mask].index:
 #       print(i, len(df.at[i, col2 + "_split"]))

# --- SAVE result ---
expanded_df.to_excel(OUTPUT_FILE, index=False)
print("Expanded sheet written to:", OUTPUT_FILE)


Expanded sheet written to: artifacts_expanded.ods
