# Image Preprocess

In [None]:
import sys
from pathlib import Path

PROJECT_ROOT = Path().resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

import cv2 as cv
import numpy as np

from Code.image import ImgPreproc, ImgPreprocCfg

run = 17
dataset = 7

# --- Configuración de paths ---
input_dir_base = PROJECT_ROOT / "Database" / "data" / f"image{dataset}"
output_dir_base = PROJECT_ROOT / "Database" / "tmp" / "image" / f"imgPreprocTry{run:02d}"
output_dir_base.mkdir(parents=True, exist_ok=True)

labels = ["Clavo", "Tornillo", "Tuerca", "Arandela"]

Imgcfg = ImgPreprocCfg(
    target_size=512,
    sigma = 2.0,
    flag_refine_mask = False,
    open_ksize = 3,
    close_ksize = 3
)

pre = ImgPreproc(cfg=Imgcfg)

for l in labels:
    input_dir = input_dir_base / l
    output_dir = output_dir_base / l
    output_dir.mkdir(parents=True, exist_ok=True)
    
    for i in sorted(input_dir.glob("*.jpg")):
        i_bgr = cv.imread(str(i))
        if i_bgr is None:
            continue

        mask_ = pre._normalize(i_bgr)

        img_sq, mask_sq = pre.process(i_bgr)

        out_name = f"{i.stem}_sq.png"
        cv.imwrite(str(output_dir / out_name), img_sq)

        out_name = f"{i.stem}_mask.png"
        cv.imwrite(str(output_dir / out_name), mask_sq)



# Image Features

In [None]:
import sys
from pathlib import Path

PROJECT_ROOT = Path().resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from Code.image import ImgPreproc, ImgPreprocCfg, ImgFeat, Standardizer

run = 1
dataset = 7

# --- Configuración de paths ---
input_dir_base = PROJECT_ROOT / "Database" / "data" / f"image{dataset}"
output_dir_base = PROJECT_ROOT / "Database" / "tmp" / "image" / f"KMeans_TryOut{run:02d}"
output_dir_base.mkdir(parents=True, exist_ok=True)

labels = ["Clavo", "Tornillo", "Tuerca", "Arandela"]

Imgcfg = ImgPreprocCfg(
    target_size=512,
    sigma = 2.0,
    flag_refine_mask = False,
    open_ksize = 3,
    close_ksize = 3
)
pre = ImgPreproc(cfg=Imgcfg)
feat = ImgFeat("3D")

rows = []
names = None
resultados = []

for label in labels:
    input_dir = input_dir_base / label
    for path in sorted(input_dir.glob("*.jpg")):
        img = cv.imread(str(path))
        if img is None:
            continue

        mask_obj = pre._normalize(img)
        img_sq, mask_sq = pre._crop_and_square(img, mask_obj, size=pre.cfg.target_size)

        vec, names, debug = feat.extract(img_sq, mask_sq)
        rows.append([label, path.name, *vec.tolist()])
        resultados.append(vec)

vec_alphas = [1]

X = np.asarray(resultados)
df_meta = pd.DataFrame(rows, columns=["clase", "archivo", *names])[["clase", "archivo"]]

for alpha in vec_alphas:
    X_scaled = X.copy()
    X_scaled[:, 2] *= alpha  # escala la tercera feature

    stats = Standardizer().calculate_statistics(X_scaled)
    X_std = stats.transform(X_scaled)

    df_base = pd.DataFrame(X_scaled, columns=names)
    df_out = pd.concat([df_meta.reset_index(drop=True), df_base.add_suffix("_scaled")], axis=1)

    csv_path = output_dir_base / f"features_alpha{alpha}.csv"
    df_out.to_csv(csv_path, index=False)
    display(df_out)


if False:
    stats = Standardizer()
    stats.calculate_statistics(X)
    X_std = stats.transform(X)
    cols = ["clase", "archivo", *names]
    fig, ax = plt.subplots(figsize=(len(cols)*1.2, len(rows)*0.4 + 1))
    ax.axis("off")
    table = ax.table(cellText=rows, colLabels=cols, loc="center")
    table.auto_set_font_size(False)
    table.set_fontsize(8)
    table.scale(1, 1.2)
    plt.show()

        



# K Means Model Tryout

In [None]:
import sys
from pathlib import Path

PROJECT_ROOT = Path().resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from Code.image import ImgPreproc, ImgPreprocCfg, ImgFeat, Standardizer, KMeansModel

run = 1
dataset = 7

# --- Configuración de paths ---
input_dir_base = PROJECT_ROOT / "Database" / "data" / f"image{dataset}"
output_dir_base = PROJECT_ROOT / "Database" / "tmp" / "image" / f"KMeans_TryOut{run:02d}"
output_dir_base.mkdir(parents=True, exist_ok=True)

labels = ["Clavo", "Tornillo", "Tuerca", "Arandela"]

Imgcfg = ImgPreprocCfg(
    target_size=512,
    sigma = 2.0,
    flag_refine_mask = False,
    open_ksize = 3,
    close_ksize = 3
)
pre = ImgPreproc(cfg=Imgcfg)
feat = ImgFeat("3D")

rows = []
names = None
resultados = []

for label in labels:
    input_dir = input_dir_base / label
    for path in sorted(input_dir.glob("*.jpg")):
        img = cv.imread(str(path))
        if img is None:
            continue

        mask_obj = pre._normalize(img)
        img_sq, mask_sq = pre._crop_and_square(img, mask_obj, size=pre.cfg.target_size)

        vec, names, debug = feat.extract(img_sq, mask_sq)
        rows.append([label, path.name, *vec.tolist()])
        resultados.append(vec)


X = np.asarray(resultados)
df_meta = pd.DataFrame(rows, columns=["clase", "archivo", *names])[["clase", "archivo"]]

if False:
    vec_alphas = [1]
    for alpha in vec_alphas:
        X_scaled = X.copy()
        X_scaled[:, 2] *= alpha  # escala la tercera feature

        stats = Standardizer().calculate_statistics(X_scaled)
        X_std = stats.transform(X_scaled)

        df_base = pd.DataFrame(X_scaled, columns=names)
        df_out = pd.concat([df_meta.reset_index(drop=True), df_base.add_suffix("_scaled")], axis=1)

        csv_path = output_dir_base / f"features_alpha{alpha}.csv"
        df_out.to_csv(csv_path, index=False)
        display(df_out)

if False:
    model = KMeansModel(n_clusters=4, random_state=42)
    centers = model.fit(X)

    df_centers = pd.DataFrame(centers, columns=names)
    df_centers.insert(0, "cluster", range(len(df_centers)))
    display(df_centers)

    labels = model.predict(X)
    df_assign = df_meta.copy()
    df_assign["cluster"] = labels
    display(df_assign)

if False:
    seeds = [x for x in range(10)]
    results = []

    for seed in seeds:
        model = KMeansModel(n_clusters=4, random_state=seed)
        centers = model.fit(X)            
        labels = model.predict(X)
        inertia = model.inertia_
        results.append(
            {"seed": seed, "inertia": inertia, "centers": centers, "labels": labels}
        )

    df_runs = pd.DataFrame([{"seed": r["seed"], "inertia": r["inertia"]} for r in results])
    # display(df_runs)

    rows = []
    for r in results:
        for k, c in enumerate(r["centers"]):
            rows.append({"seed": r["seed"], "cluster": k, **{n: c[i] for i, n in enumerate(names)}})
    df_centers = pd.DataFrame(rows)

    start = -4
    end = 0

    for seed in seeds:
        start += 4
        end += 4
        display(df_centers.iloc[start:end])

if True:
    # semilla manual: forma (n_clusters, n_features)
    semilla_ = np.array([
        [1.0, 0.0, 0.0],    # Arandela
        [0.0, 0.0, 1.0],    # Clavo
        [0.0, 1.0, 1.0],    # Tornillo
        [1.0, 0.0, 1.0],    # Tuerca
    ], dtype=float)

    model = KMeansModel(n_clusters=4)
    centers = model.fit(X, init_centers=semilla_)
    labels = model.predict(X)
    inertia = model.inertia_

    results = [{"seed": "manual", "inertia": inertia, "centers": centers, "labels": labels}]

    df_runs = pd.DataFrame([{"seed": r["seed"], "inertia": r["inertia"]} for r in results])

    rows = []
    for r in results:
        for k, c in enumerate(r["centers"]):
            rows.append({"seed": r["seed"], "cluster": k, **{n: c[i] for i, n in enumerate(names)}})
    df_centers = pd.DataFrame(rows)

    display(df_runs)
    display(df_centers)




Unnamed: 0,seed,cluster,n_holes,r_hull,radiar_var
0,0,0,0.0,0.210088,1.0
1,0,1,0.0,1.120556,1.0
2,0,2,1.0,0.184029,0.062199
3,0,3,1.0,0.17652,0.838559


Unnamed: 0,seed,cluster,n_holes,r_hull,radiar_var
4,1,0,1.0,0.17652,0.838559
5,1,1,0.0,1.120556,1.0
6,1,2,1.0,0.184029,0.062199
7,1,3,0.0,0.210088,1.0


Unnamed: 0,seed,cluster,n_holes,r_hull,radiar_var
8,2,0,0.0,1.120556,1.0
9,2,1,1.0,0.180275,0.450379
10,2,2,0.0,0.220087,1.0
11,2,3,0.0,0.17009,1.0


Unnamed: 0,seed,cluster,n_holes,r_hull,radiar_var
12,3,0,0.0,0.17009,1.0
13,3,1,0.0,0.220087,1.0
14,3,2,0.0,1.120556,1.0
15,3,3,1.0,0.180275,0.450379


Unnamed: 0,seed,cluster,n_holes,r_hull,radiar_var
16,4,0,1.0,0.17652,0.838559
17,4,1,1.0,0.18322,0.048921
18,4,2,1.0,0.187264,0.115313
19,4,3,0.0,0.665322,1.0


Unnamed: 0,seed,cluster,n_holes,r_hull,radiar_var
20,5,0,1.0,0.17652,0.838559
21,5,1,0.0,0.665322,1.0
22,5,2,1.0,0.187264,0.115313
23,5,3,1.0,0.18322,0.048921


Unnamed: 0,seed,cluster,n_holes,r_hull,radiar_var
24,6,0,0.0,0.210088,1.0
25,6,1,1.0,0.184029,0.062199
26,6,2,1.0,0.17652,0.838559
27,6,3,0.0,1.120556,1.0


Unnamed: 0,seed,cluster,n_holes,r_hull,radiar_var
28,7,0,1.0,0.187264,0.115313
29,7,1,0.0,0.665322,1.0
30,7,2,1.0,0.18322,0.048921
31,7,3,1.0,0.17652,0.838559


Unnamed: 0,seed,cluster,n_holes,r_hull,radiar_var
32,8,0,0.0,0.210088,1.0
33,8,1,0.0,1.120556,1.0
34,8,2,1.0,0.184029,0.062199
35,8,3,1.0,0.17652,0.838559


Unnamed: 0,seed,cluster,n_holes,r_hull,radiar_var
36,9,0,0.0,0.210088,1.0
37,9,1,1.0,0.17652,0.838559
38,9,2,1.0,0.184029,0.062199
39,9,3,0.0,1.120556,1.0
