In [17]:
import os
import cv2
import pywt
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA

# CONSTANTS AND VARIABLES
FOLDER_PATH = 'datasets/Mariposas'
OUTPUT_FILEPATH = 'final_attempt.csv'
NEW_SIZE_IMAGES = (32, 32) # 224x224 pixels
NUM_COMPONENTS_PCA = 240 # at least 80% cumulative variance ratio
characteristic_vectors = []
target_list = []
file_list = os.listdir(FOLDER_PATH)

# PCA
pca = PCA(n_components=NUM_COMPONENTS_PCA)

for file_name in file_list:
    file_path = os.path.join(FOLDER_PATH, file_name)
    image = cv2.imread(file_path)
    resized_image = cv2.resize(image, NEW_SIZE_IMAGES)
    gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    coeffs = pywt.dwt2(gray_image, 'bior1.3')
    approx, (horizontal_detail, vertical_detail, diagonal_detail) = coeffs
    flattened_coeffs = np.concatenate([approx.flatten(), horizontal_detail.flatten(),
                            vertical_detail.flatten(), diagonal_detail.flatten()])
    normalized_coeffs = (flattened_coeffs - flattened_coeffs.mean()) / flattened_coeffs.std()
    characteristic_vectors.append(normalized_coeffs)
    target_list.append(int(file_name[:3]))

data_matrix = pd.DataFrame(np.vstack(characteristic_vectors))
data_matrix['target'] = target_list
data_matrix.to_csv('datasets/final_images.csv', index = False) 

In [18]:
import os
import cv2
import pywt
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA

# CONSTANTS AND VARIABLES
FOLDER_PATH = 'datasets/Mariposas'
OUTPUT_FILEPATH = 'final_attempt.csv'
NEW_SIZE_IMAGES = (32, 32)  # Nueva resolución en píxeles
NUM_COMPONENTS_PCA = 240  # Al menos 80% de la varianza acumulativa
characteristic_vectors = []
target_list = []
file_list = os.listdir(FOLDER_PATH)

# PCA
pca = PCA(n_components=NUM_COMPONENTS_PCA)

for file_name in file_list:
    file_path = os.path.join(FOLDER_PATH, file_name)
    
    # Abrir la imagen con cv2
    image = cv2.imread(file_path)
    
    # Redimensionar la imagen a la nueva escala deseada (32x32 píxeles)
    resized_image = cv2.resize(image, NEW_SIZE_IMAGES)
    
    # Obtener la resolución DPI original (72 DPI por defecto)
    original_dpi = (72, 72)
    
    # Convertir la imagen a escala de grises
    gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    
    # Realizar el procesamiento de ondas y PCA como en tu código original
    coeffs = pywt.dwt2(gray_image, 'bior1.3')
    approx, (horizontal_detail, vertical_detail, diagonal_detail) = coeffs
    flattened_coeffs = np.concatenate([approx.flatten(), horizontal_detail.flatten(),
                            vertical_detail.flatten(), diagonal_detail.flatten()])
    normalized_coeffs = (flattened_coeffs - flattened_coeffs.mean()) / flattened_coeffs.std()
    
    characteristic_vectors.append(normalized_coeffs)
    target_list.append(int(file_name[:3]))

data_matrix = pd.DataFrame(np.vstack(characteristic_vectors))
data_matrix['target'] = target_list
data_matrix.to_csv(OUTPUT_FILEPATH, index=False)
