In [1]:
import numpy as np
import pandas as pd

primary_df = pd.read_excel(r"Data\Excel\KC revised v5.xlsx", sheet_name="Primary level")
primary_df.columns = primary_df.columns.str.strip().str.replace(" ", "_")
primary_df["Row_Index"] = primary_df.index
primary_df.head()

Unnamed: 0,Grade,Grade_id,Level_0,Level_1,Level_1_id,KC_code,Level_2,KC_type,KC_name,Row_Index
0,Primary 1,P1,WHOLE NUMBERS,Comparison and ordering,Comparison and ordering,WNCmp,comparing and ordering whole numbers,Secondary,WHOLE NUMBERS | Comparison and ordering | comp...,0
1,Primary 1,P1,WHOLE NUMBERS,Addition,Addition,WNAdd2nd,adding whole numbers,Secondary,WHOLE NUMBERS | Addition | adding whole numbers,1
2,Primary 1,P1,WHOLE NUMBERS,Subtraction,Subtraction,WNSub2nd,subtracting whole numbers,Secondary,WHOLE NUMBERS | Subtraction | subtracting whol...,2
3,Primary 1,P1,WHOLE NUMBERS,Multiplication,Multiplication,WNMul2nd,multiplying whole numbers,Secondary,WHOLE NUMBERS | Multiplication | multiplying w...,3
4,Primary 1,P1,WHOLE NUMBERS,Division,Division,WNDiv2nd,dividing whole numbers,Secondary,WHOLE NUMBERS | Division | dividing whole numbers,4


In [2]:
import json
seen_code_pairs = set()

# Define hierarchy mappings
number_type_rank = {
    "MATRICES": 7, "SET": 7, "ALGEBRA": 7, "STATISTICS AND PROBABILITY": 7,
    "BASE AND POWER": 6,
    "RATIO": 5, "RATE": 5, "PERCENTAGE": 5,
    "FRACTIONS AND DECIMALS": 4,
    "DECIMALS": 3, 
    "FRACTIONS": 2, 
    "WHOLE NUMBERS": 1
}
operation_rank = {
    "Solving": 8,
    "Division": 7,
    "Multiplication": 6,
    "Subtraction": 5,
    "Addition": 4, "Operations": 4,
    "Evaluation": 3, "Conversion": 3, "Simplifying": 3, "Finding": 3, "Rounding": 3, 
    "Comparison and ordering": 2,
    "Representation and concept": 1
}

def grade_less_equal(g1, g2):
    return int(g1[1:]) <= int(g2[1:])

# Filter primary and secondary KCs
## Primary KCs must be marked as "Primary" and not in P1 or P2
primary_kcs = primary_df[(primary_df["KC_type"] == "Primary") & (~primary_df["Grade_id"].isin(["P1", "P2"]))].copy()
## Secondary KCs can be either "Primary" or "Secondary"
secondary_kcs = primary_df[primary_df["KC_type"].isin(["Primary", "Secondary"])].copy() 

# Pair generation
kc_pairs = {}

for _, pkc in primary_kcs.iterrows():
    pkc_num = pkc["Level_0"]
    pkc_op = pkc["Level_1_id"]
    pkc_num_rank = number_type_rank.get(pkc["Level_0"], -1)
    pkc_op_rank = operation_rank.get(pkc["Level_1_id"], -1)
    valid_skcs = secondary_kcs[
        (secondary_kcs["Grade_id"].apply(lambda g: grade_less_equal(g, pkc["Grade_id"]))) &  ## Secondary KC must be of the same or lower grade than Primary KC
        (
            (secondary_kcs["Level_0"].map(number_type_rank).fillna(-1) < pkc_num_rank) | ## Secondary KC must: Either have a lower Number rank than Primary KC
            (
                (secondary_kcs["Level_0"] == pkc_num) & ## Or have the same Number type, but with a lower Operation rank/the same Operation type compared to Primary KC
                ((secondary_kcs["Level_1_id"].map(operation_rank).fillna(-1) < pkc_op_rank) | (secondary_kcs["Level_1_id"] == pkc_op))
            )
        )
    ].copy()
    excluded_ops = ["Addition", "Subtraction", "Multiplication", "Division"]
    valid_skcs = valid_skcs[~((valid_skcs['KC_type']=="Primary") & (valid_skcs["Level_0"].isin(['WHOLE NUMBERS', 'FRACTIONS', 'DECIMALS'])) & (valid_skcs["Level_1_id"].isin(excluded_ops)))]  # Exclude Primary KCs with specific operations
    valid_skcs = valid_skcs[valid_skcs["KC_code"] != pkc["KC_code"]]  # Exclude the same KC code
    
    basic_ops = ["Addition", "Subtraction", "Multiplication", "Division"]
    selected_skcs = []
    for _, skc in valid_skcs.iterrows():
        
        # Rule 1: Secondary KC marked as "Secondary" must match the primary KC's Number Type except with operations: Addition, Subtraction, Multiplication, and Division
        if (skc["KC_type"] == "Secondary") and (skc["Level_0"] != pkc["Level_0"]) and (skc["Level_1_id"] not in basic_ops):
            continue
         
        # Rule 2:  Characteristic Pairing Rules for Fr
        if pkc["Level_0"] == "FRACTIONS":
            if skc["Level_0"] != "FRACTIONS":
                continue
            
        # Rule 3: Characteristic Pairing Rules for Dc
        if pkc["Level_0"] == "DECIMALS":
            if skc["Level_0"] == "DECIMALS":
                if skc["Level_1_id"] in ["Multiplication", "Division"]:
                    continue
            else:
                continue
            
        # Rule 4: Characteristic Pairing Rules for Pc:
        if pkc["Level_0"] == "PERCENTAGE":
            if skc["Level_0"] == "WHOLE NUMBERS":
                if skc["Level_1_id"] not in ["Addition", "Subtraction", "Multiplication", "Division"]:
                    continue
            elif skc["Level_0"] == "PERCENTAGE" and skc["KC_type"] == "Primary":
                continue
            else:
                continue
            
        # Rule 5: Characteristic Pairing Rules for Rt:
        if pkc["Level_0"] == "RATE":
            if skc["Level_0"] == "DECIMALS":
                if skc["Level_1_id"] not in ["Addition", "Subtraction"]:
                    continue
            elif skc["Level_0"] == "RATE" and skc["KC_type"] == "Primary":
                continue
            else:
                continue
            
        # Rule 6: Characteristic Pairing Rules for FD:
        if pkc["Level_0"] == "FRACTIONS AND DECIMALS":
            if skc["Level_0"] != "FRACTIONS AND DECIMALS":
                continue

        # Rule 7: Characteristic Pairing Rules for BP:
        if pkc["Level_0"] == "BASE AND POWER":
            if skc["Level_0"] != "BASE AND POWER":
                continue
            else:
                if skc['KC_code'] == "BPRepSN":
                    continue
            
        # Rule 8: Characteristic Pairing Rules for MT:
        if pkc["Level_0"] == "MATRICES":
            if skc["Level_0"] != "MATRICES":
                continue
        # Rule 9: Characteristic Pairing Rules for Division:
        if pkc["Level_1_id"] == "Division" and skc["Level_1_id"] == "Division":
            continue
        # Rule 10
        if (pkc["KC_type"] == "Primary") and (skc["KC_type"] == "Secondary") and (pkc["Level_0"] == skc["Level_0"]) and (pkc["Level_1_id"] == skc["Level_1_id"]):
            # Skip pairing Primary KCs with Secondary KCs of the same Number Type and Operation
            continue
        non_skc_list = [
            "FrRep12", "FrRepMixIm", "FrRepSet", "FrCnvEq",
            "RoFndDvqWN", 
            "RoFndRoWN",
            "RoFndTmWN",
            "RoRepFr",
            "RoRepDc",
            "RoRepDP",  
            "RoRepIvP",
            "PcRep2q", "PcRepRvs", 
            "SPFndmdn",
            "SPFndQtl",
            "SPFndmean",
            "SPFndstd",
            "SPAddProb",
            "SPMulProb",
            "STOprUn",
            "AgRepLrEx",
            "AgSlvLrN",
            "AgRepExSq",
            "AgRepnth",
            "AgSlvFrLr",
            "AgSlvSq1v",
            "AgSlvLr2v",
            "AgSlvIneq",
            "AgRepEq",
            "AgRepIneq",
            "AgEvlEx",
            "RtFndR", 
            "DcCnvN2D",
            "DcCnvD2N",
            "RtFndT",
            "RtFndU",
            "RoSmpWN",
            "RoSmpFr",
            "RoSmpDc",
            "PcCnv2Fr", "PcCnv2Dc",
            "SPFndmode", "SPFndPctl", "SPFndrng",
            "SPRepPrSE", "SPFndPrCE",
            "STOprIns",
            "AgSmpLrEx", "AgEvlLrEx"
        ]
        if skc["KC_code"] in non_skc_list:
            # Skip these KCs as they are not to be paired
            continue

        fixed_rule_map = {
            "FrRep12": None, "FrRepMixIm": None, "FrRepSet": None, "FrCnvEq": None,
            "RoFndDvqWN": "WHOLE NUMBERS", 
            "RoFndRoWN": "WHOLE NUMBERS",
            "RoFndTmWN":  "WHOLE NUMBERS",
            "RoRepFr":  "FRACTIONS",
            "RoRepDc": "DECIMALS",
            "RoRepDP": "WHOLE NUMBERS",  
            "RoRepIvP": "WHOLE NUMBERS",
            "PcRep2q": None, "PcRepRvs": None, 
            "SPFndmdn": None,
            "SPFndQtl": None,
            "SPFndmean": None,
            "SPFndstd": None,
            "SPAddProb": None,
            "SPMulProb": None,
            "STOprUn": None,
            "AgRepLrEx": None,
            "AgSlvLrN": None,
            "AgRepExSq": None,
            "AgRepnth": None,
            "AgSlvFrLr": None,
            "AgSlvSq1v": None,
            "AgSlvLr2v": None,
            "AgSlvIneq": None,
            "AgRepEq": None,
            "AgRepIneq": None,
            "AgEvlEx": None,
            "RtFndR": None, 
            "DcCnvN2D": None,
            "DcCnvD2N": None,
            "RtFndT": None,
            "RtFndU": None,
            "RoSmpWN": None,
            "RoSmpFr": None,
            "RoSmpDc": None,
            "PcCnv2Fr": None, "PcCnv2Dc": None,
            "SPFndmode": None, "SPFndPctl": None, "SPFndrng": None,
            "SPRepPrSE": None, "SPFndPrCE": None,
            "STOprIns": None,
            "AgSmpLrEx": None, "AgEvlLrEx": None,
            "AgRepLrEx":None}
        fixed_rule_operations = {
            "RoFndDvqWN": ["Addition", "Subtraction"],
            "RoFndTmWN": ["Addition", "Subtraction"],
            "RoFndRoWN": ["Addition", "Subtraction", "Multiplication", "Division"], 
            "RoRepFr": ["Addition", "Subtraction", "Multiplication", "Division"],
            "RoRepDc": ["Addition", "Subtraction"],
            "RoRepDP": ["Multiplication", "Division"],
            "RoRepIvP": ["Multiplication", "Division"]
        }
        if pkc["KC_code"] in fixed_rule_map:
            allowed_type = fixed_rule_map[pkc["KC_code"]]
            if skc["Level_0"] != allowed_type:
                continue
            else:
                if skc["Level_1_id"] not in fixed_rule_operations.get(pkc["KC_code"], []):
                    continue

        selected_skcs.append(skc)
    
    # Forced characteristic pairing rules for some KC_codes
    forced_pairs = {
        "FrRep12":["FrCnvEq"], "FrRepMixIm": ["FrCnvEq"], "FrRepSet": ["FrCnvEq"],
        "RtFndR": ["DcCnvN2D", "DcCnvD2N"],
        "RtFndT": ["DcCnvN2D", "DcCnvD2N"],
        "RtFndU": ["DcCnvN2D", "DcCnvD2N"],
        "RoFndRoWN": ["RoSmpWN"],
        "RoRepFr": ["RoSmpFr"],
        "RoRepDc": ["RoSmpDc"],
        "PcRep2q": ["PcCnv2Fr", "PcCnv2Dc"], "PcRepRvs": ["PcCnv2Fr", "PcCnv2Dc"], 
        "SPFndmdn": ["SPFndmode", "SPFndPctl", "SPFndrng"],
        "SPFndQtl": ["SPFndIQR"],
        "SPFndmean": ["BPRepSN"],
        "SPFndstd": ["SPFndmean"],
        "SPAddProb": ["SPRepPrSE", "SPFndPrCE"],
        "SPMulProb": ["SPRepPrSE", "SPFndPrCE"],
        "STOprUn": ["STOprIns"],
        "AgRepLrEx": ["AgSmpLrEx", "AgEvlLrEx"],
        "AgSlvLrN": ["AgRepLrEx"],
        "AgRepExSq": ["AgEvlEx"],
        "AgRepnth": ["AgEvlEx"],
        "AgSlvFrLr": ["AgRepEq"],
        "AgSlvSq1v": ["AgRepEq"],
        "AgSlvLr2v": ["AgRepEq"],
        "AgSlvIneq": ["AgRepIneq"],
        "MXSub": ["MXAdd"],
        "MXMulSM": ["MXAdd", "MXSub"],
        "MXMul": ["MXAdd", "MXSub", "MXMulSM"],
        "FDSub": ["FDAdd"],
        "FDMul": ["FDAdd", "FDSub"],
        "FDDiv": ["FDAdd", "FDSub", "FDMul"]
    }

    if pkc["KC_code"] in forced_pairs:
        for skc_code in forced_pairs[pkc["KC_code"]]:
            skc_rows = primary_df[primary_df["KC_code"] == skc_code]
            if not skc_rows.empty:
                selected_skcs.append(skc_rows.iloc[0])


    for skc in selected_skcs:
        if pkc["KC_code"] == skc["KC_code"]: ## Skip if primary and secondary KCs are the same
            continue
        forward = (pkc["KC_code"], skc["KC_code"])
        reverse = (skc["KC_code"], pkc["KC_code"])

        if reverse in seen_code_pairs:
            continue  # Skip if reverse direction already added

        seen_code_pairs.add(forward)
        key = f"{pkc['Grade_id']}-{pkc['KC_code']}_{skc['Grade_id']}-{skc['KC_code']}"
        value = {
            "primary_kc_name": pkc.get("KC_name", ""),
            "primary_kc_grade": pkc.get("Grade", ""),
            "primary_kc_number": pkc.get("Level_0", ""),
            "primary_kc_operation": pkc.get("Level_1_id", ""),
            "secondary_kc_name": skc.get("KC_name", ""),
            "secondary_kc_grade": skc.get("Grade", ""),
            "secondary_kc_number": skc.get("Level_0", ""),
            "secondary_kc_operation": skc.get("Level_1_id", "")
        }
        kc_pairs[key] = value

# Save output
output_path = r"Data\Final_kc_pairs\primary_kc_pairs_final.json"
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(kc_pairs, f, indent=4, ensure_ascii=False)

print(f"KC pairings saved to: {output_path}", "len(kc_pairs):", len(kc_pairs))


KC pairings saved to: Data\Final_kc_pairs\primary_kc_pairs_final.json len(kc_pairs): 130


In [3]:
import json
seen_code_pairs = set()
for PRIMARY_GRADE in ["P3", "P4", "P5", "P6"]:
    # --------------- CONFIGURATION ---------------
    OUTPUT_PATH = rf"Data\Final_kc_pairs\{PRIMARY_GRADE}_kc_pairs_final.json"
    # ---------------------------------------------

    # Define hierarchy mappings
    number_type_rank = {
        "MATRICES": 7, "SET": 7, "ALGEBRA": 7, "STATISTICS AND PROBABILITY": 7,
        "BASE AND POWER": 6,
        "RATIO": 5, "RATE": 5, "PERCENTAGE": 5,
        "FRACTIONS AND DECIMALS": 4,
        "DECIMALS": 3, 
        "FRACTIONS": 2, 
        "WHOLE NUMBERS": 1
    }
    operation_rank = {
        "Solving": 8,
        "Division": 7,
        "Multiplication": 6,
        "Subtraction": 5,
        "Addition": 4, "Operations": 4,
        "Evaluation": 3, "Conversion": 3, "Simplifying": 3, "Finding": 3, "Rounding": 3, 
        "Comparison and ordering": 2,
        "Representation and concept": 1
    }

    def grade_less_equal(g1, g2):
        return int(g1[1:]) <= int(g2[1:])

    # Filter KCs
    primary_kcs = primary_df[(primary_df["Grade_id"] == PRIMARY_GRADE) & (primary_df["KC_type"] == "Primary")].copy()
    secondary_kcs = primary_df[primary_df["KC_type"].isin(["Primary", "Secondary"])].copy()

    # Pair generation
    kc_pairs = {}

    for _, pkc in primary_kcs.iterrows():
        pkc_num = pkc["Level_0"]
        pkc_op = pkc["Level_1_id"]
        pkc_num_rank = number_type_rank.get(pkc["Level_0"], -1)
        pkc_op_rank = operation_rank.get(pkc["Level_1_id"], -1)
        valid_skcs = secondary_kcs[
            (secondary_kcs["Grade_id"].apply(lambda g: grade_less_equal(g, pkc["Grade_id"]))) &  ## Secondary KC must be of the same or lower grade than Primary KC
            (
                (secondary_kcs["Level_0"].map(number_type_rank).fillna(-1) < pkc_num_rank) | ## Secondary KC must: Either have a lower Number rank than Primary KC
                (
                    (secondary_kcs["Level_0"] == pkc_num) & ## Or have the same Number type, but with a lower Operation rank/the same Operation type compared to Primary KC
                    ((secondary_kcs["Level_1_id"].map(operation_rank).fillna(-1) < pkc_op_rank) | (secondary_kcs["Level_1_id"] == pkc_op))
                )
            )
        ].copy()
        excluded_ops = ["Addition", "Subtraction", "Multiplication", "Division"]
        valid_skcs = valid_skcs[~((valid_skcs['KC_type']=="Primary") & (valid_skcs["Level_0"].isin(['WHOLE NUMBERS', 'FRACTIONS', 'DECIMALS'])) & (valid_skcs["Level_1_id"].isin(excluded_ops)))]  # Exclude Primary KCs with specific operations
        valid_skcs = valid_skcs[valid_skcs["KC_code"] != pkc["KC_code"]]  # Exclude the same KC code
        
        basic_ops = ["Addition", "Subtraction", "Multiplication", "Division"]
        selected_skcs = []
        for _, skc in valid_skcs.iterrows():
            
            # Rule 1: Secondary KC marked as "Secondary" must match the primary KC's Number Type except with operations: Addition, Subtraction, Multiplication, and Division
            if (skc["KC_type"] == "Secondary") and (skc["Level_0"] != pkc["Level_0"]) and (skc["Level_1_id"] not in basic_ops):
                continue
            
            # Rule 2:  Characteristic Pairing Rules for Fr
            if pkc["Level_0"] == "FRACTIONS":
                if skc["Level_0"] != "FRACTIONS":
                    continue
                
            # Rule 3: Characteristic Pairing Rules for Dc
            if pkc["Level_0"] == "DECIMALS":
                if skc["Level_0"] == "DECIMALS":
                    if skc["Level_1_id"] in ["Multiplication", "Division"]:
                        continue
                else:
                    continue
                
            # Rule 4: Characteristic Pairing Rules for Pc:
            if pkc["Level_0"] == "PERCENTAGE":
                if skc["Level_0"] == "WHOLE NUMBERS":
                    if skc["Level_1_id"] not in ["Addition", "Subtraction", "Multiplication", "Division"]:
                        continue
                elif skc["Level_0"] == "PERCENTAGE" and skc["KC_type"] == "Primary":
                    continue
                else:
                    continue
                
            # Rule 5: Characteristic Pairing Rules for Rt:
            if pkc["Level_0"] == "RATE":
                if skc["Level_0"] == "DECIMALS":
                    if skc["Level_1_id"] not in ["Addition", "Subtraction"]:
                        continue
                elif skc["Level_0"] == "RATE" and skc["KC_type"] == "Primary":
                    continue
                else:
                    continue
                
            # Rule 6: Characteristic Pairing Rules for FD:
            if pkc["Level_0"] == "FRACTIONS AND DECIMALS":
                if skc["Level_0"] != "FRACTIONS AND DECIMALS":
                    continue

            # Rule 7: Characteristic Pairing Rules for BP:
            if pkc["Level_0"] == "BASE AND POWER":
                if skc["Level_0"] != "BASE AND POWER":
                    continue
                else:
                    if skc['KC_code'] == "BPRepSN":
                        continue
                
            # Rule 8: Characteristic Pairing Rules for MT:
            if pkc["Level_0"] == "MATRICES":
                if skc["Level_0"] != "MATRICES":
                    continue
            # Rule 9: Characteristic Pairing Rules for Division:
            if pkc["Level_1_id"] == "Division" and skc["Level_1_id"] == "Division":
                continue

            # Rule 10
            if (pkc["KC_type"] == "Primary") and (skc["KC_type"] == "Secondary") and (pkc["Level_0"] == skc["Level_0"]) and (pkc["Level_1_id"] == skc["Level_1_id"]):
                # Skip pairing Primary KCs with Secondary KCs of the same Number Type and Operation
                continue
            non_skc_list = [
                "FrRep12", "FrRepMixIm", "FrRepSet", "FrCnvEq",
                "RoFndDvqWN", 
                "RoFndRoWN",
                "RoFndTmWN",
                "RoRepFr",
                "RoRepDc",
                "RoRepDP",  
                "RoRepIvP",
                "PcRep2q", "PcRepRvs", 
                "SPFndmdn",
                "SPFndQtl",
                "SPFndmean",
                "SPFndstd",
                "SPAddProb",
                "SPMulProb",
                "STOprUn",
                "AgRepLrEx",
                "AgSlvLrN",
                "AgRepExSq",
                "AgRepnth",
                "AgSlvFrLr",
                "AgSlvSq1v",
                "AgSlvLr2v",
                "AgSlvIneq",
                "AgRepEq",
                "AgRepIneq",
                "AgEvlEx",
                "RtFndR", 
                "DcCnvN2D",
                "DcCnvD2N",
                "RtFndT",
                "RtFndU",
                "RoSmpWN",
                "RoSmpFr",
                "RoSmpDc",
                "PcCnv2Fr", "PcCnv2Dc",
                "SPFndmode", "SPFndPctl", "SPFndrng",
                "SPRepPrSE", "SPFndPrCE",
                "STOprIns",
                "AgSmpLrEx", "AgEvlLrEx"
            ]
            if skc["KC_code"] in non_skc_list:
                # Skip these KCs as they are not to be paired
                continue

            fixed_rule_map = {
                "FrRep12": None, "FrRepMixIm": None, "FrRepSet": None, "FrCnvEq": None,
                "RoFndDvqWN": "WHOLE NUMBERS", 
                "RoFndRoWN": "WHOLE NUMBERS",
                "RoFndTmWN":  "WHOLE NUMBERS",
                "RoRepFr":  "FRACTIONS",
                "RoRepDc": "DECIMALS",
                "RoRepDP": "WHOLE NUMBERS",  
                "RoRepIvP": "WHOLE NUMBERS",
                "PcRep2q": None, "PcRepRvs": None, 
                "SPFndmdn": None,
                "SPFndQtl": None,
                "SPFndmean": None,
                "SPFndstd": None,
                "SPAddProb": None,
                "SPMulProb": None,
                "STOprUn": None,
                "AgRepLrEx": None,
                "AgSlvLrN": None,
                "AgRepExSq": None,
                "AgRepnth": None,
                "AgSlvFrLr": None,
                "AgSlvSq1v": None,
                "AgSlvLr2v": None,
                "AgSlvIneq": None,
                "AgRepEq": None,
                "AgRepIneq": None,
                "AgEvlEx": None,
                "RtFndR": None, 
                "DcCnvN2D": None,
                "DcCnvD2N": None,
                "RtFndT": None,
                "RtFndU": None,
                "RoSmpWN": None,
                "RoSmpFr": None,
                "RoSmpDc": None,
                "PcCnv2Fr": None, "PcCnv2Dc": None,
                "SPFndmode": None, "SPFndPctl": None, "SPFndrng": None,
                "SPRepPrSE": None, "SPFndPrCE": None,
                "STOprIns": None,
                "AgSmpLrEx": None, "AgEvlLrEx": None,
                "AgRepLrEx":None}
            fixed_rule_operations = {
                "RoFndDvqWN": ["Addition", "Subtraction"],
                "RoFndTmWN": ["Addition", "Subtraction"],
                "RoFndRoWN": ["Addition", "Subtraction", "Multiplication", "Division"], 
                "RoRepFr": ["Addition", "Subtraction", "Multiplication", "Division"],
                "RoRepDc": ["Addition", "Subtraction"],
                "RoRepDP": ["Multiplication", "Division"],
                "RoRepIvP": ["Multiplication", "Division"]
            }
            if pkc["KC_code"] in fixed_rule_map:
                allowed_type = fixed_rule_map[pkc["KC_code"]]
                if skc["Level_0"] != allowed_type:
                    continue
                else:
                    if skc["Level_1_id"] not in fixed_rule_operations.get(pkc["KC_code"], []):
                        continue

            selected_skcs.append(skc)
        
        # Forced characteristic pairing rules for some KC_codes
        forced_pairs = {
            "FrRep12":["FrCnvEq"], "FrRepMixIm": ["FrCnvEq"], "FrRepSet": ["FrCnvEq"],
            "RtFndR": ["DcCnvN2D", "DcCnvD2N"],
            "RtFndT": ["DcCnvN2D", "DcCnvD2N"],
            "RtFndU": ["DcCnvN2D", "DcCnvD2N"],
            "RoFndRoWN": ["RoSmpWN"],
            "RoRepFr": ["RoSmpFr"],
            "RoRepDc": ["RoSmpDc"],
            "PcRep2q": ["PcCnv2Fr", "PcCnv2Dc"], "PcRepRvs": ["PcCnv2Fr", "PcCnv2Dc"], 
            "SPFndmdn": ["SPFndmode", "SPFndPctl", "SPFndrng"],
            "SPFndQtl": ["SPFndIQR"],
            "SPFndmean": ["BPRepSN"],
            "SPFndstd": ["SPFndmean"],
            "SPAddProb": ["SPRepPrSE", "SPFndPrCE"],
            "SPMulProb": ["SPRepPrSE", "SPFndPrCE"],
            "STOprUn": ["STOprIns"],
            "AgRepLrEx": ["AgSmpLrEx", "AgEvlLrEx"],
            "AgSlvLrN": ["AgRepLrEx"],
            "AgRepExSq": ["AgEvlEx"],
            "AgRepnth": ["AgEvlEx"],
            "AgSlvFrLr": ["AgRepEq"],
            "AgSlvSq1v": ["AgRepEq"],
            "AgSlvLr2v": ["AgRepEq"],
            "AgSlvIneq": ["AgRepIneq"],
            "MXSub": ["MXAdd"],
            "MXMulSM": ["MXAdd", "MXSub"],
            "MXMul": ["MXAdd", "MXSub", "MXMulSM"],
            "FDSub": ["FDAdd"],
            "FDMul": ["FDAdd", "FDSub"],
            "FDDiv": ["FDAdd", "FDSub", "FDMul"]
        }

        if pkc["KC_code"] in forced_pairs:
            for skc_code in forced_pairs[pkc["KC_code"]]:
                skc_rows = primary_df[primary_df["KC_code"] == skc_code]
                if not skc_rows.empty:
                    selected_skcs.append(skc_rows.iloc[0])

    
        for skc in selected_skcs:
            if pkc["KC_code"] == skc["KC_code"]: ## Skip if primary and secondary KCs are the same
                continue
            forward = (pkc["KC_code"], skc["KC_code"])
            reverse = (skc["KC_code"], pkc["KC_code"])

            if reverse in seen_code_pairs:
                continue  # Skip if reverse direction already added

            seen_code_pairs.add(forward)
            key = f"{pkc['Grade_id']}-{pkc['KC_code']}_{skc['Grade_id']}-{skc['KC_code']}"
            value = {
                "primary_kc_name": pkc.get("KC_name", ""),
                "primary_kc_grade": pkc.get("Grade", ""),
                "primary_kc_number": pkc.get("Level_0", ""),
                "primary_kc_operation": pkc.get("Level_1_id", ""),
                "secondary_kc_name": skc.get("KC_name", ""),
                "secondary_kc_grade": skc.get("Grade", ""),
                "secondary_kc_number": skc.get("Level_0", ""),
                "secondary_kc_operation": skc.get("Level_1_id", "")
            }
            kc_pairs[key] = value

    # Save to JSON
    with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
        json.dump(kc_pairs, f, indent=4, ensure_ascii=False)

    print(f"KC pairs saved to: {OUTPUT_PATH}", len(kc_pairs))


KC pairs saved to: Data\Final_kc_pairs\P3_kc_pairs_final.json 19
KC pairs saved to: Data\Final_kc_pairs\P4_kc_pairs_final.json 32
KC pairs saved to: Data\Final_kc_pairs\P5_kc_pairs_final.json 47
KC pairs saved to: Data\Final_kc_pairs\P6_kc_pairs_final.json 32


In [4]:
import numpy as np
import pandas as pd

secondary_df = pd.read_excel(r"Data\Excel\KC revised v5.xlsx", sheet_name="O level")
secondary_df.columns = secondary_df.columns.str.strip().str.replace(" ", "_")
secondary_df =pd.concat([primary_df, secondary_df], ignore_index=True)
secondary_df["Row_Index"] = secondary_df.index
print(secondary_df['Level_0'].unique())

['WHOLE NUMBERS' 'FRACTIONS' 'DECIMALS' 'PERCENTAGE' 'RATE' 'RATIO'
 'ALGEBRA' 'STATISTICS AND PROBABILITY' 'BASE AND POWER' 'SET' 'MATRICES']


In [5]:
import json
seen_code_pairs = set()
# Define hierarchy mappings
number_type_rank = {
    "MATRICES": 7, "SET": 7, "ALGEBRA": 7, "STATISTICS AND PROBABILITY": 7,
    "BASE AND POWER": 6,
    "RATIO": 5, "RATE": 5, "PERCENTAGE": 5,
    "FRACTIONS AND DECIMALS": 4,
    "DECIMALS": 3, 
    "FRACTIONS": 2, 
    "WHOLE NUMBERS": 1
}
operation_rank = {
    "Solving": 8,
    "Division": 7,
    "Multiplication": 6,
    "Subtraction": 5,
    "Addition": 4, "Operations": 4,
    "Evaluation": 3, "Conversion": 3, "Simplifying": 3, "Finding": 3, "Rounding": 3, 
    "Comparison and ordering": 2,
    "Representation and concept": 1
}

def grade_less_equal(g1, g2):
    if g1[0] == "P" and g2[0] == "O":
        return True
    elif g1[0] == "O" and g2[0] == "P":
        return False
    else:
        return int(g1[1:]) <= int(g2[1:])

# Filter primary and secondary KCs
## Primary KCs must be marked as "Primary" and not in P1 or P2
primary_kcs = secondary_df[(secondary_df["KC_type"] == "Primary") & (secondary_df["Grade_id"].isin(["O1", "O2", "O3"]))].copy()
## Secondary KCs can be either "Primary" or "Secondary"
secondary_kcs = secondary_df[secondary_df["KC_type"].isin(["Primary", "Secondary"])].copy() 

# Pair generation
kc_pairs = {}
for _, pkc in primary_kcs.iterrows():
    pkc_num = pkc["Level_0"]
    pkc_op = pkc["Level_1_id"]
    pkc_num_rank = number_type_rank.get(pkc["Level_0"], -1)
    pkc_op_rank = operation_rank.get(pkc["Level_1_id"], -1)
    valid_skcs = secondary_kcs[
        (secondary_kcs["Grade_id"].apply(lambda g: grade_less_equal(g, pkc["Grade_id"]))) &  ## Secondary KC must be of the same or lower grade than Primary KC
        (
            (secondary_kcs["Level_0"].map(number_type_rank).fillna(-1) < pkc_num_rank) | ## Secondary KC must: Either have a lower Number rank than Primary KC
            (
                (secondary_kcs["Level_0"] == pkc_num) & ## Or have the same Number type, but with a lower Operation rank/the same Operation type compared to Primary KC
                ((secondary_kcs["Level_1_id"].map(operation_rank).fillna(-1) < pkc_op_rank) | (secondary_kcs["Level_1_id"] == pkc_op))
            )
        )
    ].copy()
    excluded_ops = ["Addition", "Subtraction", "Multiplication", "Division"]
    valid_skcs = valid_skcs[~((valid_skcs['KC_type']=="Primary") & (valid_skcs["Level_0"].isin(['WHOLE NUMBERS', 'FRACTIONS', 'DECIMALS'])) & (valid_skcs["Level_1_id"].isin(excluded_ops)))]  # Exclude Primary KCs with specific operations
    valid_skcs = valid_skcs[valid_skcs["KC_code"] != pkc["KC_code"]]  # Exclude the same KC code
    
    basic_ops = ["Addition", "Subtraction", "Multiplication", "Division"]
    selected_skcs = []
    for _, skc in valid_skcs.iterrows():
        
        # Rule 1: Secondary KC marked as "Secondary" must match the primary KC's Number Type except with operations: Addition, Subtraction, Multiplication, and Division
        if (skc["KC_type"] == "Secondary") and (skc["Level_0"] != pkc["Level_0"]) and (skc["Level_1_id"] not in basic_ops):
            continue
         
        # Rule 2:  Characteristic Pairing Rules for Fr
        if pkc["Level_0"] == "FRACTIONS":
            if skc["Level_0"] != "FRACTIONS":
                continue
            
        # Rule 3: Characteristic Pairing Rules for Dc
        if pkc["Level_0"] == "DECIMALS":
            if skc["Level_0"] == "DECIMALS":
                if skc["Level_1_id"] in ["Multiplication", "Division"]:
                    continue
            else:
                continue
            
        # Rule 4: Characteristic Pairing Rules for Pc:
        if pkc["Level_0"] == "PERCENTAGE":
            if skc["Level_0"] == "WHOLE NUMBERS":
                if skc["Level_1_id"] not in ["Addition", "Subtraction", "Multiplication", "Division"]:
                    continue
            elif skc["Level_0"] == "PERCENTAGE" and skc["KC_type"] == "Primary":
                continue
            else:
                continue
            
        # Rule 5: Characteristic Pairing Rules for Rt:
        if pkc["Level_0"] == "RATE":
            if skc["Level_0"] == "DECIMALS":
                if skc["Level_1_id"] not in ["Addition", "Subtraction"]:
                    continue
            elif skc["Level_0"] == "RATE" and skc["KC_type"] == "Primary":
                continue
            else:
                continue
            
        # Rule 6: Characteristic Pairing Rules for FD:
        if pkc["Level_0"] == "FRACTIONS AND DECIMALS":
            if skc["Level_0"] != "FRACTIONS AND DECIMALS":
                continue

        # Rule 7: Characteristic Pairing Rules for BP:
        if pkc["Level_0"] == "BASE AND POWER":
            if skc["Level_0"] != "BASE AND POWER":
                continue
            else:
                if skc['KC_code'] == "BPRepSN":
                    continue
            
        # Rule 8: Characteristic Pairing Rules for MT:
        if pkc["Level_0"] == "MATRICES":
            if skc["Level_0"] != "MATRICES":
                continue
        # Rule 9: Characteristic Pairing Rules for Division:
        if pkc["Level_1_id"] == "Division" and skc["Level_1_id"] == "Division":
            continue

        # Rule 10
        if (pkc["KC_type"] == "Primary") and (skc["KC_type"] == "Secondary") and (pkc["Level_0"] == skc["Level_0"]) and (pkc["Level_1_id"] == skc["Level_1_id"]):
            # Skip pairing Primary KCs with Secondary KCs of the same Number Type and Operation
            continue
        non_skc_list = [
            "FrRep12", "FrRepMixIm", "FrRepSet", "FrCnvEq",
            "RoFndDvqWN", 
            "RoFndRoWN",
            "RoFndTmWN",
            "RoRepFr",
            "RoRepDc",
            "RoRepDP",  
            "RoRepIvP",
            "PcRep2q", "PcRepRvs", 
            "SPFndmdn",
            "SPFndQtl",
            "SPFndmean",
            "SPFndstd",
            "SPAddProb",
            "SPMulProb",
            "STOprUn",
            "AgRepLrEx",
            "AgSlvLrN",
            "AgRepExSq",
            "AgRepnth",
            "AgSlvFrLr",
            "AgSlvSq1v",
            "AgSlvLr2v",
            "AgSlvIneq",
            "AgRepEq",
            "AgRepIneq",
            "AgEvlEx",
            "RtFndR", 
            "DcCnvN2D",
            "DcCnvD2N",
            "RtFndT",
            "RtFndU",
            "RoSmpWN",
            "RoSmpFr",
            "RoSmpDc",
            "PcCnv2Fr", "PcCnv2Dc",
            "SPFndmode", "SPFndPctl", "SPFndrng",
            "SPRepPrSE", "SPFndPrCE",
            "STOprIns",
            "AgSmpLrEx", "AgEvlLrEx"
        ]
        if skc["KC_code"] in non_skc_list:
            # Skip these KCs as they are not to be paired
            continue

        fixed_rule_map = {
            "FrRep12": None, "FrRepMixIm": None, "FrRepSet": None, "FrCnvEq": None,
            "RoFndDvqWN": "WHOLE NUMBERS", 
            "RoFndRoWN": "WHOLE NUMBERS",
            "RoFndTmWN":  "WHOLE NUMBERS",
            "RoRepFr":  "FRACTIONS",
            "RoRepDc": "DECIMALS",
            "RoRepDP": "WHOLE NUMBERS",  
            "RoRepIvP": "WHOLE NUMBERS",
            "PcRep2q": None, "PcRepRvs": None, 
            "SPFndmdn": None,
            "SPFndQtl": None,
            "SPFndmean": None,
            "SPFndstd": None,
            "SPAddProb": None,
            "SPMulProb": None,
            "STOprUn": None,
            "AgRepLrEx": None,
            "AgSlvLrN": None,
            "AgRepExSq": None,
            "AgRepnth": None,
            "AgSlvFrLr": None,
            "AgSlvSq1v": None,
            "AgSlvLr2v": None,
            "AgSlvIneq": None,
            "AgRepEq": None,
            "AgRepIneq": None,
            "AgEvlEx": None,
            "RtFndR": None, 
            "DcCnvN2D": None,
            "DcCnvD2N": None,
            "RtFndT": None,
            "RtFndU": None,
            "RoSmpWN": None,
            "RoSmpFr": None,
            "RoSmpDc": None,
            "PcCnv2Fr": None, "PcCnv2Dc": None,
            "SPFndmode": None, "SPFndPctl": None, "SPFndrng": None,
            "SPRepPrSE": None, "SPFndPrCE": None,
            "STOprIns": None,
            "AgSmpLrEx": None, "AgEvlLrEx": None,
            "AgRepLrEx":None}
        fixed_rule_operations = {
            "RoFndDvqWN": ["Addition", "Subtraction"],
            "RoFndTmWN": ["Addition", "Subtraction"],
            "RoFndRoWN": ["Addition", "Subtraction", "Multiplication", "Division"], 
            "RoRepFr": ["Addition", "Subtraction", "Multiplication", "Division"],
            "RoRepDc": ["Addition", "Subtraction"],
            "RoRepDP": ["Multiplication", "Division"],
            "RoRepIvP": ["Multiplication", "Division"]
        }
        if pkc["KC_code"] in fixed_rule_map:
            allowed_type = fixed_rule_map[pkc["KC_code"]]
            if skc["Level_0"] != allowed_type:
                continue
            else:
                if skc["Level_1_id"] not in fixed_rule_operations.get(pkc["KC_code"], []):
                    continue

        selected_skcs.append(skc)
    
    # Forced characteristic pairing rules for some KC_codes
    forced_pairs = {
        "FrRep12":["FrCnvEq"], "FrRepMixIm": ["FrCnvEq"], "FrRepSet": ["FrCnvEq"],
        "RtFndR": ["DcCnvN2D", "DcCnvD2N"],
        "RtFndT": ["DcCnvN2D", "DcCnvD2N"],
        "RtFndU": ["DcCnvN2D", "DcCnvD2N"],
        "RoFndRoWN": ["RoSmpWN"],
        "RoRepFr": ["RoSmpFr"],
        "RoRepDc": ["RoSmpDc"],
        "PcRep2q": ["PcCnv2Fr", "PcCnv2Dc"], "PcRepRvs": ["PcCnv2Fr", "PcCnv2Dc"], 
        "SPFndmdn": ["SPFndmode", "SPFndPctl", "SPFndrng"],
        "SPFndQtl": ["SPFndIQR"],
        "SPFndmean": ["BPRepSN"],
        "SPFndstd": ["SPFndmean"],
        "SPAddProb": ["SPRepPrSE", "SPFndPrCE"],
        "SPMulProb": ["SPRepPrSE", "SPFndPrCE"],
        "STOprUn": ["STOprIns"],
        "AgRepLrEx": ["AgSmpLrEx", "AgEvlLrEx"],
        "AgSlvLrN": ["AgRepLrEx"],
        "AgRepExSq": ["AgEvlEx"],
        "AgRepnth": ["AgEvlEx"],
        "AgSlvFrLr": ["AgRepEq"],
        "AgSlvSq1v": ["AgRepEq"],
        "AgSlvLr2v": ["AgRepEq"],
        "AgSlvIneq": ["AgRepIneq"],
        "MXSub": ["MXAdd"],
        "MXMulSM": ["MXAdd", "MXSub"],
        "MXMul": ["MXAdd", "MXSub", "MXMulSM"],
        "FDSub": ["FDAdd"],
        "FDMul": ["FDAdd", "FDSub"],
        "FDDiv": ["FDAdd", "FDSub", "FDMul"]
    }
    if pkc["KC_code"] in forced_pairs:
        for skc_code in forced_pairs[pkc["KC_code"]]:
            skc_rows = secondary_df[secondary_df["KC_code"] == skc_code]
            if not skc_rows.empty:
                selected_skcs.append(skc_rows.iloc[0])
    for skc in selected_skcs:
        if pkc["KC_code"] == skc["KC_code"]: ## Skip if primary and secondary KCs are the same
            continue
        forward = (pkc["KC_code"], skc["KC_code"])
        reverse = (skc["KC_code"], pkc["KC_code"])

        if reverse in seen_code_pairs:
            continue  # Skip if reverse direction already added

        seen_code_pairs.add(forward)
        key = f"{pkc['Grade_id']}-{pkc['KC_code']}_{skc['Grade_id']}-{skc['KC_code']}"
        value = {
            "primary_kc_name": pkc.get("KC_name", ""),
            "primary_kc_grade": pkc.get("Grade", ""),
            "primary_kc_number": pkc.get("Level_0", ""),
            "primary_kc_operation": pkc.get("Level_1_id", ""),
            "secondary_kc_name": skc.get("KC_name", ""),
            "secondary_kc_grade": skc.get("Grade", ""),
            "secondary_kc_number": skc.get("Level_0", ""),
            "secondary_kc_operation": skc.get("Level_1_id", "")
        }
        kc_pairs[key] = value

# Save output
output_path = r"Data\Final_kc_pairs\secondary_kc_pairs_final.json"
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(kc_pairs, f, indent=4, ensure_ascii=False)

print(f"KC pairings saved to: {output_path}", "len(kc_pairs):", len(kc_pairs))


KC pairings saved to: Data\Final_kc_pairs\secondary_kc_pairs_final.json len(kc_pairs): 46


In [6]:
import json
seen_code_pairs = set()
for PRIMARY_GRADE in ["O1", "O2", "O3"]:
    # --------------- CONFIGURATION ---------------
    OUTPUT_PATH = rf"Data\Final_kc_pairs\{PRIMARY_GRADE}_kc_pairs_final.json"
    # ---------------------------------------------
    # Define hierarchy mappings
    number_type_rank = {
        "MATRICES": 7, "SET": 7, "ALGEBRA": 7, "STATISTICS AND PROBABILITY": 7,
        "BASE AND POWER": 6,
        "RATIO": 5, "RATE": 5, "PERCENTAGE": 5,
        "FRACTIONS AND DECIMALS": 4,
        "DECIMALS": 3, 
        "FRACTIONS": 2, 
        "WHOLE NUMBERS": 1
    }
    operation_rank = {
        "Solving": 8,
        "Division": 7,
        "Multiplication": 6,
        "Subtraction": 5,
        "Addition": 4, "Operations": 4,
        "Evaluation": 3, "Conversion": 3, "Simplifying": 3, "Finding": 3, "Rounding": 3, 
        "Comparison and ordering": 2,
        "Representation and concept": 1
    }

    def grade_less_equal(g1, g2):
        if g1[0] == "P" and g2[0] == "O":
            return True
        elif g1[0] == "O" and g2[0] == "P":
            return False
        else:
            return int(g1[1:]) <= int(g2[1:])

    # Filter KCs
    primary_kcs = secondary_df[(secondary_df["Grade_id"] == PRIMARY_GRADE) & (secondary_df["KC_type"] == "Primary")].copy()
    secondary_kcs = secondary_df[secondary_df["KC_type"].isin(["Primary", "Secondary"])].copy()

    # Pair generation
    kc_pairs = {}

    for _, pkc in primary_kcs.iterrows():
        pkc_num = pkc["Level_0"]
        pkc_op = pkc["Level_1_id"]
        pkc_num_rank = number_type_rank.get(pkc["Level_0"], -1)
        pkc_op_rank = operation_rank.get(pkc["Level_1_id"], -1)
        valid_skcs = secondary_kcs[
            (secondary_kcs["Grade_id"].apply(lambda g: grade_less_equal(g, pkc["Grade_id"]))) &  ## Secondary KC must be of the same or lower grade than Primary KC
            (
                (secondary_kcs["Level_0"].map(number_type_rank).fillna(-1) < pkc_num_rank) | ## Secondary KC must: Either have a lower Number rank than Primary KC
                (
                    (secondary_kcs["Level_0"] == pkc_num) & ## Or have the same Number type, but with a lower Operation rank/the same Operation type compared to Primary KC
                    ((secondary_kcs["Level_1_id"].map(operation_rank).fillna(-1) < pkc_op_rank) | (secondary_kcs["Level_1_id"] == pkc_op))
                )
            )
        ].copy()
        excluded_ops = ["Addition", "Subtraction", "Multiplication", "Division"]
        valid_skcs = valid_skcs[~((valid_skcs['KC_type']=="Primary") & (valid_skcs["Level_0"].isin(['WHOLE NUMBERS', 'FRACTIONS', 'DECIMALS'])) & (valid_skcs["Level_1_id"].isin(excluded_ops)))]  # Exclude Primary KCs with specific operations
        valid_skcs = valid_skcs[valid_skcs["KC_code"] != pkc["KC_code"]]  # Exclude the same KC code
        
        basic_ops = ["Addition", "Subtraction", "Multiplication", "Division"]
        selected_skcs = []
        for _, skc in valid_skcs.iterrows():
            
            # Rule 1: Secondary KC marked as "Secondary" must match the primary KC's Number Type except with operations: Addition, Subtraction, Multiplication, and Division
            if (skc["KC_type"] == "Secondary") and (skc["Level_0"] != pkc["Level_0"]) and (skc["Level_1_id"] not in basic_ops):
                continue
            
            # Rule 2:  Characteristic Pairing Rules for Fr
            if pkc["Level_0"] == "FRACTIONS":
                if skc["Level_0"] != "FRACTIONS":
                    continue
                
            # Rule 3: Characteristic Pairing Rules for Dc
            if pkc["Level_0"] == "DECIMALS":
                if skc["Level_0"] == "DECIMALS":
                    if skc["Level_1_id"] in ["Multiplication", "Division"]:
                        continue
                else:
                    continue
                
            # Rule 4: Characteristic Pairing Rules for Pc:
            if pkc["Level_0"] == "PERCENTAGE":
                if skc["Level_0"] == "WHOLE NUMBERS":
                    if skc["Level_1_id"] not in ["Addition", "Subtraction", "Multiplication", "Division"]:
                        continue
                elif skc["Level_0"] == "PERCENTAGE" and skc["KC_type"] == "Primary":
                    continue
                else:
                    continue
                
            # Rule 5: Characteristic Pairing Rules for Rt:
            if pkc["Level_0"] == "RATE":
                if skc["Level_0"] == "DECIMALS":
                    if skc["Level_1_id"] not in ["Addition", "Subtraction"]:
                        continue
                elif skc["Level_0"] == "RATE" and skc["KC_type"] == "Primary":
                    continue
                else:
                    continue
                
            # Rule 6: Characteristic Pairing Rules for FD:
            if pkc["Level_0"] == "FRACTIONS AND DECIMALS":
                if skc["Level_0"] != "FRACTIONS AND DECIMALS":
                    continue

            # Rule 7: Characteristic Pairing Rules for BP:
            if pkc["Level_0"] == "BASE AND POWER":
                if skc["Level_0"] != "BASE AND POWER":
                    continue
                else:
                    if skc['KC_code'] == "BPRepSN":
                        continue
                
            # Rule 8: Characteristic Pairing Rules for MT:
            if pkc["Level_0"] == "MATRICES":
                if skc["Level_0"] != "MATRICES":
                    continue
            # Rule 9: Characteristic Pairing Rules for Division:
            if pkc["Level_1_id"] == "Division" and skc["Level_1_id"] == "Division":
                continue

            # Rule 10
            if (pkc["KC_type"] == "Primary") and (skc["KC_type"] == "Secondary") and (pkc["Level_0"] == skc["Level_0"]) and (pkc["Level_1_id"] == skc["Level_1_id"]):
                # Skip pairing Primary KCs with Secondary KCs of the same Number Type and Operation
                continue
            non_skc_list = [
                "FrRep12", "FrRepMixIm", "FrRepSet", "FrCnvEq",
                "RoFndDvqWN", 
                "RoFndRoWN",
                "RoFndTmWN",
                "RoRepFr",
                "RoRepDc",
                "RoRepDP",  
                "RoRepIvP",
                "PcRep2q", "PcRepRvs", 
                "SPFndmdn",
                "SPFndQtl",
                "SPFndmean",
                "SPFndstd",
                "SPAddProb",
                "SPMulProb",
                "STOprUn",
                "AgRepLrEx",
                "AgSlvLrN",
                "AgRepExSq",
                "AgRepnth",
                "AgSlvFrLr",
                "AgSlvSq1v",
                "AgSlvLr2v",
                "AgSlvIneq",
                "AgRepEq",
                "AgRepIneq",
                "AgEvlEx",
                "RtFndR", 
                "DcCnvN2D",
                "DcCnvD2N",
                "RtFndT",
                "RtFndU",
                "RoSmpWN",
                "RoSmpFr",
                "RoSmpDc",
                "PcCnv2Fr", "PcCnv2Dc",
                "SPFndmode", "SPFndPctl", "SPFndrng",
                "SPRepPrSE", "SPFndPrCE",
                "STOprIns",
                "AgSmpLrEx", "AgEvlLrEx"
            ]
            if skc["KC_code"] in non_skc_list:
                # Skip these KCs as they are not to be paired
                continue

            fixed_rule_map = {
                "FrRep12": None, "FrRepMixIm": None, "FrRepSet": None, "FrCnvEq": None,
                "RoFndDvqWN": "WHOLE NUMBERS", 
                "RoFndRoWN": "WHOLE NUMBERS",
                "RoFndTmWN":  "WHOLE NUMBERS",
                "RoRepFr":  "FRACTIONS",
                "RoRepDc": "DECIMALS",
                "RoRepDP": "WHOLE NUMBERS",  
                "RoRepIvP": "WHOLE NUMBERS",
                "PcRep2q": None, "PcRepRvs": None, 
                "SPFndmdn": None,
                "SPFndQtl": None,
                "SPFndmean": None,
                "SPFndstd": None,
                "SPAddProb": None,
                "SPMulProb": None,
                "STOprUn": None,
                "AgRepLrEx": None,
                "AgSlvLrN": None,
                "AgRepExSq": None,
                "AgRepnth": None,
                "AgSlvFrLr": None,
                "AgSlvSq1v": None,
                "AgSlvLr2v": None,
                "AgSlvIneq": None,
                "AgRepEq": None,
                "AgRepIneq": None,
                "AgEvlEx": None,
                "RtFndR": None, 
                "DcCnvN2D": None,
                "DcCnvD2N": None,
                "RtFndT": None,
                "RtFndU": None,
                "RoSmpWN": None,
                "RoSmpFr": None,
                "RoSmpDc": None,
                "PcCnv2Fr": None, "PcCnv2Dc": None,
                "SPFndmode": None, "SPFndPctl": None, "SPFndrng": None,
                "SPRepPrSE": None, "SPFndPrCE": None,
                "STOprIns": None,
                "AgSmpLrEx": None, "AgEvlLrEx": None,
                "AgRepLrEx":None}
            fixed_rule_operations = {
                "RoFndDvqWN": ["Addition", "Subtraction"],
                "RoFndTmWN": ["Addition", "Subtraction"],
                "RoFndRoWN": ["Addition", "Subtraction", "Multiplication", "Division"], 
                "RoRepFr": ["Addition", "Subtraction", "Multiplication", "Division"],
                "RoRepDc": ["Addition", "Subtraction"],
                "RoRepDP": ["Multiplication", "Division"],
                "RoRepIvP": ["Multiplication", "Division"]
            }
            if pkc["KC_code"] in fixed_rule_map:
                allowed_type = fixed_rule_map[pkc["KC_code"]]
                if skc["Level_0"] != allowed_type:
                    continue
                else:
                    if skc["Level_1_id"] not in fixed_rule_operations.get(pkc["KC_code"], []):
                        continue

            selected_skcs.append(skc)
        
        # Forced characteristic pairing rules for some KC_codes
        forced_pairs = {
            "FrRep12":["FrCnvEq"], "FrRepMixIm": ["FrCnvEq"], "FrRepSet": ["FrCnvEq"],
            "RtFndR": ["DcCnvN2D", "DcCnvD2N"],
            "RtFndT": ["DcCnvN2D", "DcCnvD2N"],
            "RtFndU": ["DcCnvN2D", "DcCnvD2N"],
            "RoFndRoWN": ["RoSmpWN"],
            "RoRepFr": ["RoSmpFr"],
            "RoRepDc": ["RoSmpDc"],
            "PcRep2q": ["PcCnv2Fr", "PcCnv2Dc"], "PcRepRvs": ["PcCnv2Fr", "PcCnv2Dc"], 
            "SPFndmdn": ["SPFndmode", "SPFndPctl", "SPFndrng"],
            "SPFndQtl": ["SPFndIQR"],
            "SPFndmean": ["BPRepSN"],
            "SPFndstd": ["SPFndmean"],
            "SPAddProb": ["SPRepPrSE", "SPFndPrCE"],
            "SPMulProb": ["SPRepPrSE", "SPFndPrCE"],
            "STOprUn": ["STOprIns"],
            "AgRepLrEx": ["AgSmpLrEx", "AgEvlLrEx"],
            "AgSlvLrN": ["AgRepLrEx"],
            "AgRepExSq": ["AgEvlEx"],
            "AgRepnth": ["AgEvlEx"],
            "AgSlvFrLr": ["AgRepEq"],
            "AgSlvSq1v": ["AgRepEq"],
            "AgSlvLr2v": ["AgRepEq"],
            "AgSlvIneq": ["AgRepIneq"],
            "MXSub": ["MXAdd"],
            "MXMulSM": ["MXAdd", "MXSub"],
            "MXMul": ["MXAdd", "MXSub", "MXMulSM"],
            "FDSub": ["FDAdd"],
            "FDMul": ["FDAdd", "FDSub"],
            "FDDiv": ["FDAdd", "FDSub", "FDMul"]
        }
        if pkc["KC_code"] in forced_pairs:
            for skc_code in forced_pairs[pkc["KC_code"]]:
                skc_rows = secondary_df[secondary_df["KC_code"] == skc_code]
                if not skc_rows.empty:
                    selected_skcs.append(skc_rows.iloc[0])

    
        for skc in selected_skcs:
            if pkc["KC_code"] == skc["KC_code"]: ## Skip if primary and secondary KCs are the same
                continue
            forward = (pkc["KC_code"], skc["KC_code"])
            reverse = (skc["KC_code"], pkc["KC_code"])

            if reverse in seen_code_pairs:
                continue  # Skip if reverse direction already added

            seen_code_pairs.add(forward)

            key = f"{pkc['Grade_id']}-{pkc['KC_code']}_{skc['Grade_id']}-{skc['KC_code']}"
            value = {
                "primary_kc_name": pkc.get("KC_name", ""),
                "primary_kc_grade": pkc.get("Grade", ""),
                "primary_kc_number": pkc.get("Level_0", ""),
                "primary_kc_operation": pkc.get("Level_1_id", ""),
                "secondary_kc_name": skc.get("KC_name", ""),
                "secondary_kc_grade": skc.get("Grade", ""),
                "secondary_kc_number": skc.get("Level_0", ""),
                "secondary_kc_operation": skc.get("Level_1_id", "")
            }
            kc_pairs[key] = value
    # Save output
    output_path = OUTPUT_PATH
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(kc_pairs, f, indent=4, ensure_ascii=False)

    print(f"KC pairings saved to: {output_path}", len(kc_pairs))

KC pairings saved to: Data\Final_kc_pairs\O1_kc_pairs_final.json 19
KC pairings saved to: Data\Final_kc_pairs\O2_kc_pairs_final.json 11
KC pairings saved to: Data\Final_kc_pairs\O3_kc_pairs_final.json 16


In [7]:
with open(r"Data\Final_kc_pairs\primary_kc_pairs_final.json", "r", encoding="utf-8") as f:
    primary_pairs = json.load(f)
with open(r"Data\Final_kc_pairs\secondary_kc_pairs_final.json", "r", encoding="utf-8") as f:
    secondary_pairs = json.load(f)
kc_pairs = {**primary_pairs, **secondary_pairs}
with open(r"Data\Final_kc_pairs\all_kc_pairs_final.json", "w", encoding="utf-8") as f:
    json.dump(kc_pairs, f, indent=4, ensure_ascii=False)
    

In [10]:
import pandas as pd
import json

# Load the necessary data
file_path_primary_kc_pairs = r"Data\Final_kc_pairs\all_kc_pairs_final.json"
file_path_excel = r'Data\Excel\KC revised v5.xlsx'

# Read the JSON file and the "Primary level" sheet from the Excel file
with open(file_path_primary_kc_pairs, 'r', encoding='utf-8') as f:
    primary_kc_pairs = json.load(f)

# Read the Excel sheet
df = pd.read_excel(file_path_excel, sheet_name='All')

# Initialize dictionaries to count the occurrences
primary_count = {}
secondary_count = {}

# Loop through the primary KC pairs in the JSON
for key, value in primary_kc_pairs.items():
    primary_kc_name = value["primary_kc_name"]
    secondary_kc_name = value["secondary_kc_name"]

    # Count occurrences of each KC as Primary
    if primary_kc_name in primary_count:
        primary_count[primary_kc_name] += 1
    else:
        primary_count[primary_kc_name] = 1
    
    # Count occurrences of each KC as Secondary
    if secondary_kc_name in secondary_count:
        secondary_count[secondary_kc_name] += 1
    else:
        secondary_count[secondary_kc_name] = 1

# Now, we need to add these statistics back into the "Primary level" sheet
df['As_primary_kc_count'] = df['KC_name'].map(primary_count).fillna(0)
df['As_secondary_kc_count'] = df['KC_name'].map(secondary_count).fillna(0)


# Save the updated dataframe
output_path = r'Data\Excel\count3.xlsx'
df.to_excel(output_path, sheet_name='Primary level', index=False)

output_path


'Data\\Excel\\count3.xlsx'