In [17]:
import os
import cv2
import pywt
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA

# CONSTANTS AND VARIABLES
FOLDER_PATH = 'datasets/Mariposas'
OUTPUT_FILEPATH = 'final_attempt.csv'
NEW_SIZE_IMAGES = (32, 32) # 224x224 pixels
NUM_COMPONENTS_PCA = 240 # at least 80% cumulative variance ratio
characteristic_vectors = []
target_list = []
file_list = os.listdir(FOLDER_PATH)

# PCA
pca = PCA(n_components=NUM_COMPONENTS_PCA)

for file_name in file_list:
    file_path = os.path.join(FOLDER_PATH, file_name)
    image = cv2.imread(file_path)
    resized_image = cv2.resize(image, NEW_SIZE_IMAGES)
    gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    coeffs = pywt.dwt2(gray_image, 'bior1.3')
    approx, (horizontal_detail, vertical_detail, diagonal_detail) = coeffs
    flattened_coeffs = np.concatenate([approx.flatten(), horizontal_detail.flatten(),
                            vertical_detail.flatten(), diagonal_detail.flatten()])
    normalized_coeffs = (flattened_coeffs - flattened_coeffs.mean()) / flattened_coeffs.std()
    characteristic_vectors.append(normalized_coeffs)
    target_list.append(int(file_name[:3]))

data_matrix = pd.DataFrame(np.vstack(characteristic_vectors))
data_matrix['target'] = target_list
data_matrix.to_csv('datasets/final_images.csv', index = False) 

In [20]:
import os
import cv2
import pywt
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA

# CONSTANTS AND VARIABLES
FOLDER_PATH = 'datasets/Mariposas'
OUTPUT_FILEPATH = 'final_attempt.csv'
NEW_SIZE_IMAGES = (32, 32) # 224x224 pixels
NUM_COMPONENTS_PCA = 240 # at least 80% cumulative variance ratio
characteristic_vectors = []
target_list = []
file_list = os.listdir(FOLDER_PATH)

# PCA
pca = PCA(n_components=NUM_COMPONENTS_PCA)

for file_name in file_list:
    file_path = os.path.join(FOLDER_PATH, file_name)
    image = cv2.imread(file_path)
    
    # Resize the color image
    resized_image = cv2.resize(image, NEW_SIZE_IMAGES)
    
    # Apply Wavelet Transform to each color channel
    coeffs = [pywt.dwt2(resized_image[:, :, i], 'bior1.3') for i in range(3)]
    flattened_coeffs = np.concatenate([c[0].flatten() for c in coeffs] +
                                       [c[1][0].flatten() for c in coeffs] +
                                       [c[1][1].flatten() for c in coeffs] +
                                       [c[1][2].flatten() for c in coeffs])
    
    # Normalize the coefficients
    normalized_coeffs = (flattened_coeffs - flattened_coeffs.mean()) / flattened_coeffs.std()
    
    characteristic_vectors.append(normalized_coeffs)
    target_list.append(int(file_name[:3]))

data_matrix = pd.DataFrame(np.vstack(characteristic_vectors))
data_matrix['target'] = target_list
data_matrix.to_csv('datasets/final_images.csv', index=False)

In [28]:
!pip install tensorflow

Collecting tensorflow
  Obtaining dependency information for tensorflow from https://files.pythonhosted.org/packages/9e/b8/ed5f794359d05cd0bffb894c6418da87b93016ee17b669d55c45d1bd5d5b/tensorflow-2.13.0-cp311-cp311-win_amd64.whl.metadata
  Using cached tensorflow-2.13.0-cp311-cp311-win_amd64.whl.metadata (2.6 kB)
Collecting tensorflow-intel==2.13.0 (from tensorflow)
  Obtaining dependency information for tensorflow-intel==2.13.0 from https://files.pythonhosted.org/packages/2f/2f/3c84f675931ce3bcbc7e23acbba1e5d7f05ce769adab48322de57a9f5928/tensorflow_intel-2.13.0-cp311-cp311-win_amd64.whl.metadata
  Using cached tensorflow_intel-2.13.0-cp311-cp311-win_amd64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.13.0->tensorflow)
  Obtaining dependency information for absl-py>=1.0.0 from https://files.pythonhosted.org/packages/01/e4/dc0a1dcc4e74e08d7abedab278c795eef54a224363bb18f5692f416d834f/absl_py-2.0.0-py3-none-any.whl.metadata
  Using cached absl_py-2.0.0-py3-none-

In [51]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
# CONSTANTS AND VARIABLES
FOLDER_PATH = 'datasets/Mariposas'
OUTPUT_FILEPATH = 'final_attempt.csv'
NEW_SIZE_IMAGES = (28, 28) # 224x224 pixels
NUM_COMPONENTS_PCA = 240 # at least 80% cumulative variance ratio
characteristic_vectors = []
target_list = []
file_list = os.listdir(FOLDER_PATH)

# Create an ImageDataGenerator to resize the images
datagen = ImageDataGenerator(rescale = 1.0 / 255.0)  # Rescale pixel values to [0, 1]

for file_name in file_list:
    file_path = os.path.join(FOLDER_PATH, file_name)
    
    # Load and preprocess the image
    i = load_img(file_path, target_size=NEW_SIZE_IMAGES)  # Resize the image
    x = img_to_array(i)
    x = datagen.standardize(x)  # Standardize pixel values
    
    # Flatten the image into a 1D array
    x = x.flatten()
    
    characteristic_vectors.append(x)
    target_list.append(int(file_name[:3]))

    
# Create an ImageDataGenerator for data augmentation
datagen = ImageDataGenerator(
    rescale = 1.0 / 255.0,
    rotation_range = 40,          # Random rotation in the range [-40, 40] degrees
    width_shift_range = 0.2,      # Random horizontal shift by up to 20% of the image width
    height_shift_range = 0.2,     # Random vertical shift by up to 20% of the image height
    shear_range = 0.2,            # Shear transformation
    zoom_range = 0.2,             # Random zoom
    horizontal_flip = True,       # Random horizontal flip
    fill_mode = 'nearest'         # Fill mode for pixels outside the boundary
)

for file_name in file_list:
    file_path = os.path.join(FOLDER_PATH, file_name)
    
    # Load and preprocess the image
    i = load_img(file_path, target_size = NEW_SIZE_IMAGES)  # Resize the image
    x = img_to_array(i)
    x = datagen.random_transform(x)  # Apply random transformations for data augmentation
    x = datagen.standardize(x)       # Standardize pixel values
    
    # Flatten the image into a 1D array
    x = x.flatten()
    
    characteristic_vectors.append(x)
    target_list.append(int(file_name[:3]))
    
data_matrix = pd.DataFrame(np.vstack(characteristic_vectors))
data_matrix['target'] = target_list
data_matrix.to_csv('datasets/final_images.csv', index=False)

In [53]:
!pip install tqdm

Collecting tqdm
  Obtaining dependency information for tqdm from https://files.pythonhosted.org/packages/00/e5/f12a80907d0884e6dff9c16d0c0114d81b8cd07dc3ae54c5e962cc83037e/tqdm-4.66.1-py3-none-any.whl.metadata
  Downloading tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)
     ---------------------------------------- 0.0/57.6 kB ? eta -:--:--
     ---------------------------------------- 57.6/57.6 kB 1.5 MB/s eta 0:00:00
Downloading tqdm-4.66.1-py3-none-any.whl (78 kB)
   ---------------------------------------- 0.0/78.3 kB ? eta -:--:--
   ---------------------------------------- 78.3/78.3 kB 2.2 MB/s eta 0:00:00
Installing collected packages: tqdm
Successfully installed tqdm-4.66.1


In [54]:
from tqdm import tqdm

In [66]:
FOLDER_PATH = 'datasets/Mariposas'
OUTPUT_FILEPATH = 'final_attempt.csv'
NEW_SIZE_IMAGES = (32, 32) # 224x224 pixels
NUM_COMPONENTS_PCA = 240 # at least 80% cumulative variance ratio
characteristic_vectors = []
target_list = []
file_list = os.listdir(FOLDER_PATH)

for file_name in os.listdir(FOLDER_PATH):
    file_path = os.path.join(FOLDER_PATH, file_name)
    image_read = cv2.imread(file_path)
    image_resized = cv2.resize(image_read, NEW_SIZE_IMAGES).flatten()
    image_normalized = image_resized / 255.0
    characteristic_vectors.append(image_normalized)
    target_list.append(int(file_name[:3]))
    
    
data_matrix = pd.DataFrame(np.vstack(characteristic_vectors))
data_matrix['target'] = target_list
data_matrix.to_csv('datasets/final_images.csv', index=False)