In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# Yellow for 0 (void), Black for 1 (material)
YB_CMAP = ListedColormap(["yellow", "black"])


In [None]:
def full_from_bottomright_quadrant(symQ):
    BR = symQ
    BL = np.fliplr(symQ)
    TR = np.flipud(symQ)
    TL = np.flipud(np.fliplr(symQ))
    top = np.hstack([TL, TR])
    bottom = np.hstack([BL, BR])
    return np.vstack([top, bottom])

csv_path = r"G:\Other computers\Dell G3\Universitat\2nd Semester\MLMM\New\all_cells_bottomright.csv"
df_preview = pd.read_csv(csv_path, header=None)
df_preview = df_preview.apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)

for idx in range(1, 4):
    row_vals = df_preview.iloc[idx, :25].to_numpy(dtype=int)
    q5 = row_vals.reshape(5, 5, order="C")
    img10 = full_from_bottomright_quadrant(q5)

    plt.figure(figsize=(3, 3))
    plt.imshow(img10, cmap=YB_CMAP, interpolation="none", vmin=0, vmax=1)
    plt.title(f"Preview Unit Cell — Row {idx+1}")
    plt.axis("off")
    plt.show()


In [None]:
def build_shape_library():
    SHAPES = {}

    # Bars (rows x cols)
    SHAPES["bar_1x2"] = np.array([[1, 1]], dtype=int)                 # 1×2
    SHAPES["bar_2x1"] = np.array([[1], [1]], dtype=int)               # 2×1
    SHAPES["bar_1x3"] = np.array([[1, 1, 1]], dtype=int)              # 1×3
    SHAPES["bar_3x1"] = np.array([[1], [1], [1]], dtype=int)          # 3×1

    # Solid blocks
    SHAPES["block_2x2"] = np.ones((2, 2), dtype=int)
    SHAPES["block_3x3"] = np.ones((3, 3), dtype=int)
    SHAPES["block_2x3"] = np.ones((2, 3), dtype=int)
    SHAPES["block_3x2"] = np.ones((3, 2), dtype=int)

    # Plus / Cross (3×3)
    SHAPES["plus_3x3"]  = np.array([[0,1,0],
                                    [1,1,1],
                                    [0,1,0]], dtype=int)

    SHAPES["cross_3x3"] = np.array([[1,0,1],
                                    [0,1,0],
                                    [1,0,1]], dtype=int)

    # Diagonals (3×3)
    SHAPES["diag_main_3x3"] = np.array([[1,0,0],
                                        [0,1,0],
                                        [0,0,1]], dtype=int)
    SHAPES["diag_anti_3x3"] = np.array([[0,0,1],
                                        [0,1,0],
                                        [1,0,0]], dtype=int)

    # L shapes 2×2 (all four orientations)
    SHAPES["L_2x2_down_right"] = np.array([[1,0],[1,1]], dtype=int)   # └
    SHAPES["L_2x2_down_left"]  = np.array([[0,1],[1,1]], dtype=int)   # ┘
    SHAPES["L_2x2_up_right"]   = np.array([[1,1],[1,0]], dtype=int)   # ┌
    SHAPES["L_2x2_up_left"]    = np.array([[1,1],[0,1]], dtype=int)   # ┐

    # C shapes
    SHAPES["C_3x2_right_open"] = np.array([[1,1],[1,0],[1,1]], dtype=int)  # (11,10,11)
    SHAPES["C_3x2_left_open"]  = np.array([[1,1],[0,1],[1,1]], dtype=int)  # (11,01,11)
    SHAPES["C_2x3_up_open"]    = np.array([[1,0,1],[1,1,1]], dtype=int)    # (101,111)
    SHAPES["C_2x3_down_open"]  = np.array([[1,1,1],[1,0,1]], dtype=int)    # (111,101)

    # T shapes (3×3) in 4 directions
    SHAPES["T_3x3_up"]    = np.array([[1,1,1],
                                      [0,1,0],
                                      [0,1,0]], dtype=int)
    SHAPES["T_3x3_down"]  = np.array([[0,1,0],
                                      [0,1,0],
                                      [1,1,1]], dtype=int)
    SHAPES["T_3x3_left"]  = np.array([[1,0,0],
                                      [1,1,1],
                                      [1,0,0]], dtype=int)
    SHAPES["T_3x3_right"] = np.array([[0,0,1],
                                      [1,1,1],
                                      [0,0,1]], dtype=int)


    SHAPES["pixel_1x1"]     = np.array([[1]], dtype=int)              # single filled pixel
    SHAPES["checker_2x2"]   = np.array([[1,0],
                                        [0,1]], dtype=int)            # checkerboard 2×2

    return SHAPES


def show_shape_library(SHAPES, cols = 6, figsize=(12, 10)):
    keys = list(SHAPES.keys())
    n = len(keys)
    rows = (n + cols - 1) // cols
    plt.figure(figsize=figsize)
    for i, name in enumerate(keys, 1):
        plt.subplot(rows, cols, i)
        plt.imshow(SHAPES[name], cmap=YB_CMAP, interpolation="none", vmin=0, vmax=1)
        plt.title(name, fontsize=8)
        plt.axis("off")
    plt.tight_layout()
    plt.show()


SHAPES = build_shape_library()
show_shape_library(SHAPES, cols=6)
len(SHAPES)


In [None]:
def count_template_matches(img, T):

    H, W = img.shape
    r, c = T.shape
    count = 0
    for i in range(H - r + 1):
        for j in range(W - c + 1):
            if np.array_equal(img[i:i+r, j:j+c], T):
                count += 1
    return count


def shape_frequency_vector(img10, SHAPES):
    H, W = img10.shape
    out = {}
    for name, T in SHAPES.items():
        r, c = T.shape
        placements = (H - r + 1) * (W - c + 1)
        cnt = count_template_matches(img10, T) if placements > 0 else 0
        out[f"sff_{name}"] = cnt / placements if placements > 0 else 0.0
    return out


In [None]:
def build_sff_csv( csv_path, out_csv_path, start_row_1_indexed, end_row_1_indexed,
    first_n_cols = 25, quadrant_order="C", transpose_quadrant = False, verbose = True):

    csv_path = Path(csv_path)
    out_csv_path = Path(out_csv_path)
    if verbose:
        print(f"csv_path: {csv_path}  exists? {csv_path.exists()}")

    try:
        df = pd.read_csv(csv_path, header=None)
        if verbose: print("Read as comma-separated CSV")
    except Exception:
        df = pd.read_csv(csv_path, header=None, sep=r"\s+", engine="python")
        if verbose: print("Read as whitespace-separated CSV")

    df = df.apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)

    if df.shape[1] < first_n_cols:
        raise ValueError(f"CSV has only {df.shape[1]} columns; need ≥ {first_n_cols}.")

    start_iloc = max(0, start_row_1_indexed - 1)
    end_iloc   = min(len(df), end_row_1_indexed)
    if start_iloc >= end_iloc:
        raise ValueError("Selected row range is empty. Adjust start/end rows.")

    df_slice = df.iloc[start_iloc:end_iloc].copy()
    if verbose:
        print(f"Using rows (1-indexed) {start_row_1_indexed}..{min(end_row_1_indexed, len(df))} "
              f"-> iloc[{start_iloc}:{end_iloc}] ; slice shape={df_slice.shape}")

    base25 = df_slice.iloc[:, :first_n_cols].reset_index(drop=True)

    SHAPES = build_shape_library()

    sff_rows = []
    for idx in range(len(df_slice)):
        row_vals = df_slice.iloc[idx, :first_n_cols].to_numpy(dtype=int)
        q5 = row_vals.reshape(5, 5, order=quadrant_order)
        if transpose_quadrant:
            q5 = q5.T
        img10 = full_from_bottomright_quadrant(q5)
        sff = shape_frequency_vector(img10, SHAPES)
        sff_rows.append(sff)
        if verbose and (idx < 3 or (idx + 1) % 200 == 0):
            print(f"Processed global row {idx + start_row_1_indexed} "
                  f"(slice idx {idx}) — sample: {list(sff.items())[:2]}")

    sff_df = pd.DataFrame(sff_rows)

    result = pd.concat([base25, sff_df], axis=1)
    result.to_csv(out_csv_path, index=False)
    if verbose:
        print(f"Saved SFF CSV to: {out_csv_path.resolve()} "
              f"(rows {start_row_1_indexed}..{min(end_row_1_indexed, len(df))})")
        print(f"Output columns: {list(result.columns)[:10]} ... [+{len(result.columns)-10} more]")


In [None]:
out_path = build_sff_csv(
    csv_path=r"G:\Other computers\Dell G3\Universitat\2nd Semester\MLMM\New\corrected_data_10k.csv", 
    out_csv_path=r"G:\Other computers\Dell G3\Universitat\2nd Semester\MLMM\New\sff_output_10k.csv",
    start_row_1_indexed=2,
    end_row_1_indexed=3,
    quadrant_order="C",
    transpose_quadrant=False,
    verbose=True
)
print("SFF CSV written to:", out_path)

