In [None]:
import pandas as pd
import re
from google.colab import files

# 1) Upload your CSV
uploaded = files.upload()
file_name = list(uploaded.keys())[0]
df = pd.read_csv(file_name)

# 2) Class-mapping (Membership tasks)
class_replacements = {
    "Person":       "Class1",
    "Sex":          "Class2",
    "Man":          "Class3",
    "Woman":        "Class4",
    "Ancestor":     "Class5",
    "Male":         "Class6",
    "Female":       "Class7",
    "Marriage":     "Class8",
    "DomainEntity": "Class9"
}
class_lookup = class_replacements.copy()

# 3) Property-mapping (Property Assertion tasks)
prop_replacements = {
    "hasBrother":        "PropertyA",
    "hasChild":          "PropertyB",
    "hasDaughter":       "PropertyC",
    "hasSon":            "PropertyD",
    "hasPartner":        "PropertyE",
    "hasFemalePartner":  "PropertyF",
    "hasMalePartner":    "PropertyG",
    "hasRelation":       "PropertyH",
    "hasAncestor":       "PropertyI",
    "hasParent":         "PropertyJ",
    "hasFather":         "PropertyK",
    "hasMother":         "PropertyL",
    "isBloodrelationOf": "PropertyM",
    "isSiblingOf":       "PropertyN",
    "isBrotherOf":       "PropertyO",
    "isSisterOf":        "PropertyP",
    "hasSex":            "PropertyQ",
    "hasSister":         "PropertyR",
    "hasSpouse":         "PropertyS",
    "hasHusband":        "PropertyT",
    "hasWife":           "PropertyU",
    "isAncestorOf":      "PropertyV",
    "isChildOf":         "PropertyW",
    "isDaughterOf":      "PropertyX",
    "isSonOf":           "PropertyY",
    "isFatherOf":        "PropertyZ",
    "isFemalePartnerIn": "Property1",
    "isHusbandOf":       "Property2",
    "isMalePartnerIn":   "Property3",
    "isMotherOf":        "Property4",
    "isParentOf":        "Property5",
    "isPartnerIn":       "Property6",
    "isSpouseOf":        "Property7",
    "isUncleOf":         "Property8",
    "isWifeOf":          "Property9"
}

# 4) Helper: Extract class from URI
def extract_class(uri: str) -> str:
    m = re.search(r"#([A-Z][a-zA-Z]*)\b", uri)
    return m.group(1) if m else ""

# 5) Abstract SPARQL Query
def make_abstract_sparql(row):
    ttype = row.get("Task Type", "")
    sparql = str(row.get("SPARQL Query", "")).strip()
    if sparql == "" or pd.isna(sparql):
        return ""

    if ttype == "Property Assertion":
        pred = str(row.get("Predicate", "")).strip()
        if pred in prop_replacements:
            replacement = prop_replacements[pred]
            pattern = rf"#\b{re.escape(pred)}\b"
            return re.sub(pattern, f"#{replacement}", sparql)
        else:
            return sparql

    if ttype == "Membership":
        if row.get("Answer Type", "") == "Binary":
            for original, replacement in class_lookup.items():
                pattern = rf"#\b{re.escape(original)}\b"
                sparql = re.sub(pattern, f"#{replacement}", sparql)
            return sparql
        elif row.get("Answer Type", "") == "Multi Choice":
            return sparql

    return ""

# 6) Abstract Predicate
def make_abstract_predicate(row):
    ttype = row.get("Task Type", "")
    if ttype == "Property Assertion":
        pred = str(row.get("Predicate", "")).strip()
        return prop_replacements.get(pred, "")
    if ttype == "Membership":
        if row.get("Answer Type", "") == "Binary":
            sparql = str(row.get("SPARQL Query", "")).strip()
            cls_name = extract_class(sparql)
            return class_lookup.get(cls_name, "")
        elif row.get("Answer Type", "") == "Multi Choice":
            return "type"
    return ""

# 7) Abstract Answer
def make_abstract_answer(row):
    ttype = row.get("Task Type", "")
    atype = row.get("Answer Type", "")
    answer = str(row.get("Answer", "")).strip()

    if pd.isna(answer) or answer == "":
        return ""

    if ttype == "Membership" and atype == "Multi Choice":
        parts = [p.strip() for p in answer.split(",")]
        mapped = [class_lookup.get(p, p) for p in parts]
        return ", ".join(mapped)

    # NEW RULE: For Binary of any type → copy original answer
    if atype == "Binary":
        return answer

    return ""

# 8) Apply to DataFrame
df["Abstracted SPARQL Query"] = df.apply(make_abstract_sparql, axis=1)
df["Abstracted Predicate"]    = df.apply(make_abstract_predicate, axis=1)
df["Abstracted Answer"]       = df.apply(make_abstract_answer, axis=1)

# 9) Fill missing Abstracted Answers with original Answer
df["Abstracted Answer"] = df.apply(
    lambda row: row["Answer"] if pd.isna(row["Abstracted Answer"]) or str(row["Abstracted Answer"]).strip() == "" else row["Abstracted Answer"],
    axis=1
)

# 10) Save and download
output_file = "abstracted_combined_output.csv"
df.to_csv(output_file, index=False)
files.download(output_file)


Saving Names-ABOXDAtrial - ABOXDAtrial.csv to Names-ABOXDAtrial - ABOXDAtrial.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>