In [1]:
!pip install txgraffiti

Collecting txgraffiti
  Downloading txgraffiti-0.4.0-py3-none-any.whl.metadata (10 kB)
Collecting graphcalc (from txgraffiti)
  Downloading graphcalc-1.2.15-py3-none-any.whl.metadata (7.1 kB)
Downloading txgraffiti-0.4.0-py3-none-any.whl (4.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.2/4.2 MB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading graphcalc-1.2.15-py3-none-any.whl (68 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.9/68.9 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: graphcalc, txgraffiti
Successfully installed graphcalc-1.2.15 txgraffiti-0.4.0


In [2]:
# Colab single-cell: install GAP + smallgrp, export SmallGroup invariants to groups.csv,
# then write a column description / schema (JSON + Markdown) next to the CSV.

# ---------- 1) Install GAP + smallgrp ----------
!apt-get update -qq
!apt-get install -y -qq gap gap-smallgrp

# ---------- 2) Write and run GAP exporter ----------
gap_script = r"""
LoadPackage("smallgrp");

N := 128;

B2I := function(b)
  if b then return 1; else return 0; fi;
end;

out := OutputTextFile("groups.csv", false);
SetPrintFormattingStatus(out, false);

AppendTo(out,
  "n,k,isAbelian,isCyclic,isNilpotent,isSolvable,isPGroup,",
  "exponent,zSize,derivedSize,frattiniSize,abelnSize,",
  "nrConjClasses,nilpotencyClass,derivedLength,",
  "p,sy2,sy3,sy5\n"
);

for n in [1..N] do
  ng := NumberSmallGroups(n);
  if ng > 0 then
    for k in [1..ng] do
      G := SmallGroup(n,k);

      isAb := IsAbelian(G);
      isCy := IsCyclic(G);
      isNi := IsNilpotentGroup(G);
      isSo := IsSolvableGroup(G);
      isPg := IsPGroup(G);

      exp := Exponent(G);

      zS  := Size(Centre(G));
      dS  := Size(DerivedSubgroup(G));
      fS  := Size(FrattiniSubgroup(G));
      abeln := Size(FactorGroup(G, DerivedSubgroup(G)));

      kcc := NrConjugacyClasses(G);

      nclass := -1;
      if isNi then nclass := NilpotencyClassOfGroup(G); fi;

      dlen := -1;
      if isSo then dlen := DerivedLength(G); fi;

      p := 0;
      if isPg then p := Set(FactorsInt(n))[1]; fi;

      sy2 := 0; sy3 := 0; sy5 := 0;
      if n mod 2 = 0 then sy2 := Size(SylowSubgroup(G,2)); fi;
      if n mod 3 = 0 then sy3 := Size(SylowSubgroup(G,3)); fi;
      if n mod 5 = 0 then sy5 := Size(SylowSubgroup(G,5)); fi;

      AppendTo(out,
        String(n), ",", String(k), ",",
        String(B2I(isAb)), ",", String(B2I(isCy)), ",",
        String(B2I(isNi)), ",", String(B2I(isSo)), ",", String(B2I(isPg)), ",",
        String(exp), ",",
        String(zS), ",", String(dS), ",", String(fS), ",", String(abeln), ",",
        String(kcc), ",",
        String(nclass), ",", String(dlen), ",",
        String(p), ",",
        String(sy2), ",", String(sy3), ",", String(sy5), "\n"
      );
    od;
  fi;
od;

CloseStream(out);
Print("Wrote groups.csv\n");
QUIT;
"""

with open("/content/export_groups.g", "w", encoding="utf-8") as f:
    f.write(gap_script)

!gap -q -b /content/export_groups.g
!ls -lh /content/groups.csv

# ---------- 3) Load CSV and coerce boolean columns ----------
import os, json
import pandas as pd
import numpy as np

CSV_PATH = "/content/groups.csv"
df = pd.read_csv(CSV_PATH)

bool_cols = ["isAbelian","isCyclic","isNilpotent","isSolvable","isPGroup"]
for c in bool_cols:
    if c in df.columns:
        # GAP exporter uses 0/1; cast safely.
        df[c] = df[c].astype(bool)

# ---------- 4) Save column description / schema ----------
OUT_DIR = "/content"
SCHEMA_JSON_PATH = os.path.join(OUT_DIR, "groups_columns_schema.json")
SCHEMA_MD_PATH   = os.path.join(OUT_DIR, "groups_columns_schema.md")

column_notes = {
    "n": "Group order |G| (SmallGroup(n,k)).",
    "k": "SmallGroup library index (1..NumberSmallGroups(n)).",
    "isAbelian": "Boolean: IsAbelian(G).",
    "isCyclic": "Boolean: IsCyclic(G).",
    "isNilpotent": "Boolean: IsNilpotentGroup(G).",
    "isSolvable": "Boolean: IsSolvableGroup(G).",
    "isPGroup": "Boolean: IsPGroup(G).",
    "exponent": "Exponent(G).",
    "zSize": "Size(Centre(G)) = |Z(G)|.",
    "derivedSize": "Size(DerivedSubgroup(G)) = |G'|.",
    "frattiniSize": "Size(FrattiniSubgroup(G)) = |Φ(G)|.",
    "abelnSize": "Size(G/G') via FactorGroup(G,DerivedSubgroup(G)) (abelianization order).",
    "nrConjClasses": "NrConjugacyClasses(G) = k(G).",
    "nilpotencyClass": "NilpotencyClassOfGroup(G) if nilpotent, else -1.",
    "derivedLength": "DerivedLength(G) if solvable, else -1.",
    "p": "If p-group, smallest prime dividing n (for p-groups this is the unique prime); else 0.",
    "sy2": "Size(SylowSubgroup(G,2)) if 2 | n, else 0.",
    "sy3": "Size(SylowSubgroup(G,3)) if 3 | n, else 0.",
    "sy5": "Size(SylowSubgroup(G,5)) if 5 | n, else 0.",
}

def col_summary(s: pd.Series):
    non_null = int(s.notna().sum())
    nulls = int(s.isna().sum())
    dtype = str(s.dtype)
    out = {"dtype": dtype, "non_null": non_null, "nulls": nulls}
    if pd.api.types.is_numeric_dtype(s):
        ss = s.dropna()
        if len(ss) > 0:
            out.update({
                "min": float(ss.min()),
                "max": float(ss.max()),
                "mean": float(ss.mean()),
            })
    if (pd.api.types.is_bool_dtype(s) or pd.api.types.is_object_dtype(s)) and non_null > 0:
        vals = s.dropna().unique()
        out["unique_count"] = int(len(vals))
        out["examples"] = [str(v) for v in vals[:12]]
    return out

schema = {
    "source": {
        "generator": "GAP smallgrp export_groups.g",
        "SmallGroup_order_cutoff_N": int(128),
        "csv_path": CSV_PATH,
    },
    "rows": int(len(df)),
    "columns": {}
}

for c in df.columns:
    schema["columns"][c] = {
        "description": column_notes.get(c, ""),
        **col_summary(df[c]),
    }

with open(SCHEMA_JSON_PATH, "w", encoding="utf-8") as f:
    json.dump(schema, f, indent=2, ensure_ascii=False)

md = []
md.append("# Finite group dataset column schema\n")
md.append(f"- Source: GAP + smallgrp\n- Cutoff: N = 128\n- Rows: {len(df)}\n")
md.append("## Columns\n")
for c, info in schema["columns"].items():
    md.append(f"### `{c}`\n")
    if info.get("description"):
        md.append(f"{info['description']}\n")
    md.append(f"- dtype: `{info.get('dtype','')}`\n")
    md.append(f"- non-null: {info.get('non_null',0)}; nulls: {info.get('nulls',0)}\n")
    if "min" in info:
        md.append(f"- min/max/mean: {info['min']:.6g} / {info['max']:.6g} / {info['mean']:.6g}\n")
    if "unique_count" in info:
        md.append(f"- unique_count: {info['unique_count']}\n")
    if "examples" in info:
        md.append("- examples: " + ", ".join(info["examples"]) + "\n")
    md.append("\n")

with open(SCHEMA_MD_PATH, "w", encoding="utf-8") as f:
    f.write("\n".join(md))

print("Saved:")
print(" -", CSV_PATH)
print(" -", SCHEMA_JSON_PATH)
print(" -", SCHEMA_MD_PATH)


display(df.head())

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Extracting templates from packages: 100%
Preconfiguring packages ...
Selecting previously unselected package fonts-droid-fallback.
(Reading database ... 117528 files and directories currently installed.)
Preparing to unpack .../00-fonts-droid-fallback_1%3a6.0.1r16-1.1build1_all.deb ...
Unpacking fonts-droid-fallback (1:6.0.1r16-1.1build1) ...
Selecting previously unselected package poppler-data.
Preparing to unpack .../01-poppler-data_0.4.11-1_all.deb ...
Unpacking poppler-data (0.4.11-1) ...
Selecting previously unselected package tex-common.
Preparing to unpack .../02-tex-common_6.17_all.deb ...
Unpacking tex-common (6.17) ...
Selecting previously unselected package fonts-urw-base35.
Preparing to unpack .../03-fonts-urw-base35_20200910-1_all.deb ...
Unpacking fonts-urw-base35 (20200910-1) ...
Selec

Unnamed: 0,n,k,isAbelian,isCyclic,isNilpotent,isSolvable,isPGroup,exponent,zSize,derivedSize,frattiniSize,abelnSize,nrConjClasses,nilpotencyClass,derivedLength,p,sy2,sy3,sy5
0,1,1,True,True,True,True,True,1,1,1,1,1,1,0,0,1,0,0,0
1,2,1,True,True,True,True,True,2,2,1,1,2,2,1,1,2,2,0,0
2,3,1,True,True,True,True,True,3,3,1,1,3,3,1,1,3,0,3,0
3,4,1,True,True,True,True,True,4,4,1,2,4,4,1,1,2,4,0,0
4,4,2,True,False,True,True,True,2,4,1,1,4,4,1,1,2,4,0,0


In [3]:


from txgraffiti.graffiti3.heuristics.morgan import morgan_filter
from txgraffiti.graffiti3.heuristics.dalmatian import dalmatian_filter
from txgraffiti.graffiti3.graffiti3 import Graffiti3, Stage


g3 = Graffiti3(
    df,
    max_boolean_arity=2,
    morgan_filter=morgan_filter,
    dalmatian_filter=dalmatian_filter,
    sophie_cfg=dict(
        eq_tol=1e-4,
        min_target_support=5,
        min_h_support=3,
        max_violations=0,
        min_new_coverage=1,
    ),
)

STAGES = [
    Stage.CONSTANT,
    Stage.RATIO,
    Stage.LP1,
    Stage.LP2,
    Stage.LP3,
    Stage.LP4,
    Stage.POLY_SINGLE,
    Stage.MIXED,
    Stage.SQRT,
    Stage.LOG,
    Stage.SQRT_LOG,
    Stage.GEOM_MEAN,
    Stage.LOG_SUM,
    Stage.SQRT_PAIR,
    Stage.SQRT_SUM,
    Stage.EXP_EXPONENT,

]

# Target invariants to conjecture on.
TARGETS = [
        "nrConjClasses",
    ]

# Conjecture on the target invariants using the stages defined above.
result = g3.conjecture(
    targets=TARGETS,
    stages=STAGES,
    include_invariant_products=False,
    include_abs=False,
    include_min_max=False,
    include_log=False,
    enable_sophie=True,
    sophie_stages=STAGES,
    quick=True,
    show=True,
)

Stage breakdown: {'nrConjClasses': {'constant': {'conjectures': 1, 'sophie': 5}, 'ratio': {'conjectures': 7, 'sophie': 9}, 'lp1': {'conjectures': 9, 'sophie': 14}, 'sqrt': {'conjectures': 24, 'sophie': 25}, 'log': {'conjectures': 28, 'sophie': 32}, 'sqrt_log': {'conjectures': 7, 'sophie': 10}, 'sqrt_pair': {'conjectures': 1, 'sophie': 5}, 'geom_mean': {'conjectures': 16, 'sophie': 26}, 'sqrt_sum': {'conjectures': 9, 'sophie': 17}, 'log_sum': {'conjectures': 7, 'sophie': 10}, 'exp_exponent': {'conjectures': 20, 'sophie': 19}, 'lp2': {'conjectures': 33, 'sophie': 20}, 'lp3': {'conjectures': 51, 'sophie': 24}, 'lp4': {'conjectures': 61, 'sophie': 26}, 'poly_single': {'conjectures': 23, 'sophie': 20}, 'mixed': {'conjectures': 8, 'sophie': 8}}}
Total conjectures: 166
Total Sophie conditions: 173

=== Top conjectures (by touch_count, then support) ===

Conjecture 1. nrConjClasses ≤ (((3/4) · abelnSize) + ((1/4) · n))   [touches=1231, support=3596]

Conjecture 2. (isPGroup) ⇒ nrConjClasses ≤ 

In [12]:


from txgraffiti.graffiti3.heuristics.morgan import morgan_filter
from txgraffiti.graffiti3.heuristics.dalmatian import dalmatian_filter
from txgraffiti.graffiti3.graffiti3 import Graffiti3, Stage


g3 = Graffiti3(
    df,
    max_boolean_arity=2,
    morgan_filter=morgan_filter,
    dalmatian_filter=dalmatian_filter,
    sophie_cfg=dict(
        eq_tol=1e-4,
        min_target_support=5,
        min_h_support=3,
        max_violations=0,
        min_new_coverage=1,
    ),
)

STAGES = [
    # Stage.CONSTANT,
    Stage.RATIO,
    Stage.LP1,
    # Stage.LP2,
    # Stage.LP3,
    # Stage.LP4,
    Stage.POLY_SINGLE,
    # Stage.MIXED,
    # Stage.SQRT,
    # Stage.LOG,
    # Stage.SQRT_LOG,
    # Stage.GEOM_MEAN,
    # Stage.LOG_SUM,
    # Stage.SQRT_PAIR,
    # Stage.SQRT_SUM,
    # Stage.EXP_EXPONENT,

]

# Target invariants to conjecture on.
TARGETS = [
        "nrConjClasses",
    ]

# Conjecture on the target invariants using the stages defined above.
result = g3.conjecture(
    targets=TARGETS,
    stages=STAGES,
    include_invariant_products=False,
    include_abs=True,
    include_min_max=False,
    include_log=False,
    enable_sophie=True,
    sophie_stages=STAGES,
    quick=True,
    show=True,
)

Stage breakdown: {'nrConjClasses': {'ratio': {'conjectures': 34, 'sophie': 21}, 'lp1': {'conjectures': 24, 'sophie': 18}, 'poly_single': {'conjectures': 85, 'sophie': 19}}}
Total conjectures: 125
Total Sophie conditions: 48

=== Top conjectures (by touch_count, then support) ===

Conjecture 1. nrConjClasses ≥ abelnSize   [touches=247, support=3596]

Conjecture 2. nrConjClasses ≥ zSize   [touches=247, support=3596]

Conjecture 3. nrConjClasses ≤ n   [touches=247, support=3596]

Conjecture 4. (isNilpotent) ⇒ nrConjClasses ≥ (|(derivedSize - abelnSize)| + 1)   [touches=247, support=2978]

Conjecture 5. (isAbelian) ⇒ nrConjClasses ≤ abelnSize   [touches=247, support=247]

Conjecture 6. (isAbelian) ⇒ nrConjClasses ≥ n   [touches=247, support=247]

Conjecture 7. (isAbelian) ⇒ nrConjClasses ≥ (|(zSize - derivedSize)| + 1)   [touches=247, support=247]

Conjecture 8. (isAbelian) ⇒ nrConjClasses ≥ |(abelnSize - sy5)|   [touches=202, support=247]

Conjecture 9. (isAbelian) ⇒ nrConjClasses ≥ |(abe

In [13]:


from txgraffiti.graffiti3.heuristics.morgan import morgan_filter
from txgraffiti.graffiti3.heuristics.dalmatian import dalmatian_filter
from txgraffiti.graffiti3.graffiti3 import Graffiti3, Stage

df['cp'] = df['nrConjClasses']/df['n']

g3 = Graffiti3(
    df,
    max_boolean_arity=2,
    morgan_filter=morgan_filter,
    dalmatian_filter=dalmatian_filter,
    sophie_cfg=dict(
        eq_tol=1e-4,
        min_target_support=5,
        min_h_support=3,
        max_violations=0,
        min_new_coverage=1,
    ),
)

STAGES = [
    # Stage.CONSTANT,
    Stage.RATIO,
    Stage.LP1,
    # Stage.LP2,
    # Stage.LP3,
    # Stage.LP4,
    Stage.POLY_SINGLE,
    # Stage.MIXED,
    # Stage.SQRT,
    # Stage.LOG,
    # Stage.SQRT_LOG,
    # Stage.GEOM_MEAN,
    # Stage.LOG_SUM,
    # Stage.SQRT_PAIR,
    # Stage.SQRT_SUM,
    # Stage.EXP_EXPONENT,

]

# Target invariants to conjecture on.
TARGETS = [
        "cp",
    ]

# Conjecture on the target invariants using the stages defined above.
result = g3.conjecture(
    targets=TARGETS,
    stages=STAGES,
    include_invariant_products=False,
    include_abs=False,
    include_min_max=False,
    include_log=False,
    enable_sophie=True,
    sophie_stages=STAGES,
    quick=True,
    show=True,
)

Stage breakdown: {'cp': {'ratio': {'conjectures': 12, 'sophie': 11}, 'lp1': {'conjectures': 2, 'sophie': 5}, 'poly_single': {'conjectures': 19, 'sophie': 8}}}
Total conjectures: 31
Total Sophie conditions: 21

=== Top conjectures (by touch_count, then support) ===

Conjecture 1. (isPGroup) ⇒ cp ≥ (1/4)   [touches=614, support=2732]

Conjecture 2. cp ≤ 1   [touches=247, support=3596]

Conjecture 3. cp ≤ (((-2/19) · derivedSize) + (21/19))   [touches=247, support=3596]

Conjecture 4. (isSolvable) ⇒ cp ≥ ((((-6/13) · (derivedLength)²) + ((6/13) · derivedLength)) + 1)   [touches=247, support=3592]

Conjecture 5. (isNilpotent) ⇒ cp ≥ ((((-4/9) · (derivedLength)²) + ((4/9) · derivedLength)) + 1)   [touches=247, support=2978]

Conjecture 6. (isAbelian) ⇒ cp ≥ derivedSize   [touches=247, support=247]

Conjecture 7. (isAbelian) ⇒ cp ≥ 1   [touches=247, support=247]

Conjecture 8. (isAbelian) ⇒ cp ≤ (((4 · (derivedLength)²) + (-4 · derivedLength)) + 1)   [touches=247, support=247]

Conjecture 9.

In [20]:


from txgraffiti.graffiti3.heuristics.morgan import morgan_filter
from txgraffiti.graffiti3.heuristics.dalmatian import dalmatian_filter
from txgraffiti.graffiti3.graffiti3 import Graffiti3, Stage



g3 = Graffiti3(
    df,
    max_boolean_arity=2,
    morgan_filter=morgan_filter,
    dalmatian_filter=dalmatian_filter,
    sophie_cfg=dict(
        eq_tol=1e-4,
        min_target_support=5,
        min_h_support=3,
        max_violations=0,
        min_new_coverage=1,
    ),
)

STAGES = [
    # Stage.CONSTANT,
    # Stage.RATIO,
    # Stage.LP1,
    # Stage.LP2,
    # Stage.LP3,
    # Stage.LP4,
    # Stage.POLY_SINGLE,
    # Stage.MIXED,
    # Stage.SQRT,
    # Stage.LOG,
    # Stage.SQRT_LOG,
    # Stage.GEOM_MEAN,
    # Stage.LOG_SUM,
    # Stage.SQRT_PAIR,
    # Stage.SQRT_SUM,
    Stage.EXP_EXPONENT,

]

# Target invariants to conjecture on.
TARGETS = [
        "cp",
    ]

# Conjecture on the target invariants using the stages defined above.
result = g3.conjecture(
    targets=TARGETS,
    stages=STAGES,
    include_invariant_products=False,
    include_abs=False,
    include_min_max=False,
    include_log=False,
    enable_sophie=True,
    sophie_stages=STAGES,
    quick=True,
    show=True,
)

Stage breakdown: {'cp': {'exp_exponent': {'conjectures': 48, 'sophie': 0}}}
Total conjectures: 48
Total Sophie conditions: 0

=== Top conjectures (by touch_count, then support) ===

Conjecture 1. (isSolvable) ⇒ cp ≤ (derivedLength)^(((-25/28) + ((3/28) · nilpotencyClass)))   [touches=301, support=3592]

Conjecture 2. cp ≤ 1   [touches=247, support=3596]

Conjecture 3. cp ≤ (zSize)^(((3/22) + ((-3/22) · derivedLength)))   [touches=247, support=3596]

Conjecture 4. cp ≤ (zSize)^(((1/30) + ((-1/30) · derivedSize)))   [touches=247, support=3596]

Conjecture 5. cp ≤ (zSize)^(((3/22) + ((-3/22) · nilpotencyClass)))   [touches=247, support=3596]

Conjecture 6. cp ≤ (abelnSize)^(((1/9) + ((-1/9) · derivedLength)))   [touches=247, support=3596]

Conjecture 7. cp ≤ (abelnSize)^(((1/30) + ((-1/30) · derivedSize)))   [touches=247, support=3596]

Conjecture 8. cp ≤ (nrConjClasses)^(((2/27) + ((-2/27) · nilpotencyClass)))   [touches=247, support=3596]

Conjecture 9. cp ≤ (derivedSize)^((-5/16))   [t