In [17]:
import os
import cv2
import pywt
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA

# CONSTANTS AND VARIABLES
FOLDER_PATH = 'datasets/Mariposas'
OUTPUT_FILEPATH = 'final_attempt.csv'
NEW_SIZE_IMAGES = (32, 32) # 224x224 pixels
NUM_COMPONENTS_PCA = 240 # at least 80% cumulative variance ratio
characteristic_vectors = []
target_list = []
file_list = os.listdir(FOLDER_PATH)

# PCA
pca = PCA(n_components=NUM_COMPONENTS_PCA)

for file_name in file_list:
    file_path = os.path.join(FOLDER_PATH, file_name)
    image = cv2.imread(file_path)
    resized_image = cv2.resize(image, NEW_SIZE_IMAGES)
    gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    coeffs = pywt.dwt2(gray_image, 'bior1.3')
    approx, (horizontal_detail, vertical_detail, diagonal_detail) = coeffs
    flattened_coeffs = np.concatenate([approx.flatten(), horizontal_detail.flatten(),
                            vertical_detail.flatten(), diagonal_detail.flatten()])
    normalized_coeffs = (flattened_coeffs - flattened_coeffs.mean()) / flattened_coeffs.std()
    characteristic_vectors.append(normalized_coeffs)
    target_list.append(int(file_name[:3]))

data_matrix = pd.DataFrame(np.vstack(characteristic_vectors))
data_matrix['target'] = target_list
data_matrix.to_csv('datasets/final_images.csv', index = False) 

In [230]:
import os
import cv2
import pywt
import numpy as np
import pandas as pd

# CONSTANTS AND VARIABLES
FOLDER_PATH = 'datasets/Mariposas'
OUTPUT_FILEPATH = 'final_attempt.csv'
NEW_SIZE_IMAGES = (512, 512)
LEVELS = 6
file_list = os.listdir(FOLDER_PATH)
characteristic_vectors = []
target_list = []
file_list = os.listdir(FOLDER_PATH)
# function to read, rescale an convert into a gray image
def read_and_resize_to_gray_image(file_name):
    file_path = os.path.join(FOLDER_PATH, file_name)
    actual_image = cv2.imread(file_path)
    resized_image = cv2.resize(actual_image, NEW_SIZE_IMAGES)
    gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    return gray_image

# All image wavelet transformations
for file_name in file_list:
    # getting image to wavelet transform
    gray_resized_image = read_and_resize_to_gray_image(file_name)
    
    # getting coefficients according to the chosen levels
    # the deaper it goes the lowest k-feature dimentions it becomes
    coeffs = pywt.wavedec2(gray_resized_image, 'db5', mode = 'periodization', level = LEVELS)
    
    # the first element is the wavelet transformed needed image (8 x 8) to append in the
    # characteristic_vectors which is our matrix to use to make predictions, in order
    # to be a row it has to be flattened
    cA = coeffs[0].flatten()
    
    # adding it to the main list
    characteristic_vectors.append(cA)
    
    # creating its target according to the file name
    target_list.append(int(file_name[:3]))
    
data_matrix = pd.DataFrame(np.vstack(characteristic_vectors))
data_matrix['target'] = target_list
data_matrix.to_csv('datasets/wavelet_tranformed_images.csv', index = False) 