In [None]:
!pip install pandas numpy opencv-python scikit-learn tensorflow

In [None]:
import pandas as pd
import numpy as np
import cv2
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from tensorflow.keras import models, layers
from tensorflow.keras.utils import to_categorical

# -------------------------------------------------------
# 1. CARGAR CSV
# -------------------------------------------------------
df = pd.read_csv("dataset_with_noise.csv")

# Ruta de la imagen
paths = df["image_path"].values
labels = df["label"].values

# -------------------------------------------------------
# 2. CARGAR Y NORMALIZAR IMÁGENES
# -------------------------------------------------------
IM_SIZE = 64  # tamaño estándar para reducir dimensionalidad

def load_image(path):
    img = cv2.imread(path)
    img = cv2.resize(img, (IM_SIZE, IM_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = img.astype("float32") / 255.0
    return img.flatten()  # vector

X = np.array([load_image(p) for p in paths])



# -------------------------------------------------------
# 3. ENCODE LABELS
# -------------------------------------------------------
le = LabelEncoder()
y = le.fit_transform(labels)
y = to_categorical(y)

# -------------------------------------------------------
# 4. PCA PARA REDUCIR VARIABLES ****  
# -------------------------------------------------------
pca = PCA(n_components=100)   # reduce la imagen a 100 features
X_pca = pca.fit_transform(X)

# -------------------------------------------------------
# 5. TRAIN / TEST SPLIT
# -------------------------------------------------------
Xtrain, Xtest, ytrain, ytest = train_test_split(
    X_pca, y, test_size=0.2, random_state=42, stratify=y
)

# -------------------------------------------------------
# 6. RED NEURONAL SIMPLE
# -------------------------------------------------------
model = models.Sequential([
    layers.Input(shape=(100,)),
    layers.Dense(64, activation="relu"),
    layers.Dense(32, activation="relu"),
    layers.Dense(y.shape[1], activation="softmax")
])

model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# -------------------------------------------------------
# 7. ENTRENAMIENTO
# -------------------------------------------------------
model.fit(
    Xtrain, ytrain,
    validation_data=(Xtest, ytest),
    batch_size=32,
    epochs=20
)

# -------------------------------------------------------
# 8. GUARDAR MODELO Y PCA
# -------------------------------------------------------
model.save("sign_model.h5")
np.save("pca_components.npy", pca.components_)
np.save("pca_mean.npy", pca.mean_)
np.save("label_classes.npy", le.classes_)
print("Modelo guardado.")


Epoch 1/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.2762 - loss: 2.8058 - val_accuracy: 0.6654 - val_loss: 2.2484
Epoch 2/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7586 - loss: 1.6026 - val_accuracy: 0.8574 - val_loss: 1.0072
Epoch 3/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9114 - loss: 0.6616 - val_accuracy: 0.9316 - val_loss: 0.4737
Epoch 4/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9452 - loss: 0.3262 - val_accuracy: 0.9525 - val_loss: 0.3131
Epoch 5/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9690 - loss: 0.2035 - val_accuracy: 0.9639 - val_loss: 0.2268
Epoch 6/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9814 - loss: 0.1384 - val_accuracy: 0.9715 - val_loss: 0.1773
Epoch 7/20
[1m66/66[0m [32m━━━━━━━━━━



Modelo guardado.


## Random Forest

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
import joblib

# -------------------------------------------------------------
# CARGA DEL CSV
# -------------------------------------------------------------
df = pd.read_csv("dataset_with_noise.csv")

# Etiquetas variable objetivo
y = df["label"]

# Guardar las clases
label_classes = np.unique(y)
np.save("rf_label_classes.npy", label_classes)

# Convertir letras a índices
label_to_idx = {c: i for i, c in enumerate(label_classes)}
y_idx = y.map(label_to_idx)

# -------------------------------------------------------------
# FEATURES NUMÉRICAS (TODO EXCEPTO PATH Y LABEL)
# -------------------------------------------------------------
feature_cols = [
    "x","y","z",
    "thumb","fore","index","ring","little",
    "thumb2","fore2","index2","ring2","little2",
    "keycode","gs1","gs2","sign",
    "giroX","giroY","giroZ"
]

X = df[feature_cols].values.astype("float32")

# -------------------------------------------------------------
# NORMALIZACIÓN
# -------------------------------------------------------------
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Guardar scaler
joblib.dump(scaler, "rf_scaler.pkl")

# -------------------------------------------------------------
# PCA
# -------------------------------------------------------------
pca = PCA(n_components=0.95, svd_solver='full')  # conserva 95% varianza
X_pca = pca.fit_transform(X_scaled)

np.save("rf_pca_components.npy", pca.components_)
np.save("rf_pca_mean.npy", pca.mean_)

print("PCA dimensiones reducidas:", X_pca.shape[1])

# -------------------------------------------------------------
# SPLIT
# -------------------------------------------------------------
X_train, X_val, y_train, y_val = train_test_split(
    X_pca, y_idx, test_size=0.2, random_state=42, stratify=y_idx
)

# -------------------------------------------------------------
# RANDOM FOREST
# -------------------------------------------------------------
rf = RandomForestClassifier(
    n_estimators=500,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train, y_train)

# -------------------------------------------------------------
# EVALUACIÓN
# -------------------------------------------------------------
acc = rf.score(X_val, y_val)
print("Accuracy en validación:", acc)

# -------------------------------------------------------------
# GUARDAR MODELO
# -------------------------------------------------------------
joblib.dump(rf, "rf_model.pkl")

print("Modelo Random Forest guardado como rf_model.pkl")


PCA dimensiones reducidas: 9
Accuracy en validación: 0.9961977186311787
Modelo Random Forest guardado como rf_model.pkl


## SVM

In [3]:
import pandas as pd
import numpy as np
import cv2
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

def train_svm(csv_path="dataset.csv"):
    df = pd.read_csv(csv_path)

    # -------------------------------------
    # 1. Cargar y procesar imágenes
    # -------------------------------------
    images = []
    for path in df["image_path"]:
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (64, 64))
        img = img.flatten() / 255.0
        images.append(img)

    images = np.array(images)

    # -------------------------------------
    # 2. Features numéricos del CSV
    # -------------------------------------
    numeric_cols = [
        "x","y","z",
        "thumb","fore","index","ring","little",
        "thumb2","fore2","index2","ring2","little2",
        "gs1","gs2",
        "giroX","giroY","giroZ"
    ]

    csv_features = df[numeric_cols].values.astype(float)

    # unir imagen + csv
    X = np.hstack([images, csv_features])

    # -------------------------------------
    # 3. Labels
    # -------------------------------------
    labels = df["label"]
    label_classes = np.unique(labels)
    label_to_idx = {c: i for i, c in enumerate(label_classes)}
    y = np.array([label_to_idx[l] for l in labels])

    # -------------------------------------
    # 4. Train Test Split
    # -------------------------------------
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, random_state=42, stratify=y
    )

    # -------------------------------------
    # 5. Normalizar
    # -------------------------------------
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # -------------------------------------
    # 6. PCA
    # -------------------------------------
    pca = PCA(0.97)
    X_train_pca = pca.fit_transform(X_train_scaled)
    X_test_pca = pca.transform(X_test_scaled)

    # -------------------------------------
    # 7. Entrenar SVM
    # -------------------------------------
    svm = SVC(kernel="rbf", probability=True)
    svm.fit(X_train_pca, y_train)

    # -------------------------------------
    # 8. Evaluar
    # -------------------------------------
    y_pred = svm.predict(X_test_pca)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    cm = confusion_matrix(y_test, y_pred)

    print("\n===== MÉTRICAS DEL MODELO SVM =====")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1-Score:  {f1:.4f}")
    print("\nReporte de Clasificación:")
    print(classification_report(y_test, y_pred, zero_division=0))

    print("Matriz de Confusión:")
    print(cm)

    # -------------------------------------
    # 9. Guardar modelos
    # -------------------------------------
    joblib.dump(svm, "svm_model.pkl")
    joblib.dump(scaler, "svm_scaler.pkl")
    joblib.dump(pca, "svm_pca.pkl")
    np.save("svm_label_classes.npy", label_classes)

    print("\n✔ Modelo SVM entrenado, evaluado y guardado correctamente.")

train_svm("dataset_with_noise.csv")



===== MÉTRICAS DEL MODELO SVM =====
Accuracy:  0.9954
Precision: 0.9956
Recall:    0.9954
F1-Score:  0.9954

Reporte de Clasificación:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98        25
           1       1.00      0.96      0.98        25
           2       1.00      0.96      0.98        25
           3       0.96      1.00      0.98        25
           4       1.00      1.00      1.00        25
           5       1.00      1.00      1.00        25
           6       1.00      1.00      1.00        26
           7       1.00      1.00      1.00        26
           8       1.00      1.00      1.00        25
           9       1.00      1.00      1.00        25
          10       1.00      1.00      1.00        26
          11       1.00      1.00      1.00        25
          12       1.00      1.00      1.00        25
          13       1.00      1.00      1.00        25
          14       1.00      1.00      1.00        26