In [13]:
import pandas as pd
import numpy as np
import os
import cv2
import tqdm

def folder_to_csv(folder, classification, save = False):
    """Folder is a folder path, classification is the string classifiction. Save is a boolean
    folder_to_csv() returns a csv with the path of the images in the first column and the classification in the second column
    setting 'save' to true saves the csv to your drive with the name of the file as the name of the classification variable"""
    file_names = []
    for picture in os.listdir(folder):
        path = os.path.join(folder, picture)
        file_names.append([path, classification])
    df = pd.DataFrame(file_names, columns = ["File Path", "Classification"])
    if save == True:
        name = f'{classification}.csv'
        csv = df.to_csv(name, index = False)
    return df.to_csv(index = False)

def folder_to_array(folder, image_size, vectorlocation, save = False):
    """folder is a folder path, image_size is the image size of the post-processed picture, vectorlocation is the index of the one-hot vector
    folder_to_array() returns all of the images in the folder specified into an 2d array with column1 as the image array and column2 as the
    one-hot vector representing the data classification"""
    training_data = []
    for picture in os.listdir(folder):
        if "jpg" in picture:
            path = os.path.join(folder, picture)
            img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (image_size, image_size))
            training_data.append([np.array(img), np.eye(4)[vectorlocation]]) #<- second index is the one-hot vector
    training_data = np.array(training_data, list)
    if save == True:
        np.save(folder, training_data)
    return

#example function calls:
#folder_to_csv(r"AugmentedAlzheimerDataset\VeryMildDemented", "verymilddemented", True)
#folder_to_array(r"AugmentedAlzheimerDataset\VeryMildDemented", "verymilddemented", 100, 0, True)
        

File Path,Classification
AugmentedAlzheimerDataset\VeryMildDemented\0001b959-d622-4311-acab-84633370c892.jpg,verymilddemented
AugmentedAlzheimerDataset\VeryMildDemented\0003659d-f8db-4ce4-9230-2ba24506df68.jpg,verymilddemented
AugmentedAlzheimerDataset\VeryMildDemented\000a074f-a3a5-4c70-8c94-d7ed7bbe7018.jpg,verymilddemented
AugmentedAlzheimerDataset\VeryMildDemented\000b7abc-2404-411d-a46d-467ec55b7795.jpg,verymilddemented
AugmentedAlzheimerDataset\VeryMildDemented\000dea20-ea76-4248-a45d-4119f0bc5ccc.jpg,verymilddemented
AugmentedAlzheimerDataset\VeryMildDemented\000efebb-d4ae-449a-ab10-5e74afa79ef9.jpg,verymilddemented
AugmentedAlzheimerDataset\VeryMildDemented\000f173f-ebd5-4ef7-b4d4-6cf80cf754b5.jpg,verymilddemented
AugmentedAlzheimerDataset\VeryMildDemented\0020ed3a-2b5f-4e46-9b96-97484c10a88c.jpg,verymilddemented
AugmentedAlzheimerDataset\VeryMildDemented\002149f6-8f25-4b27-9d7a-d059f3966676.jpg,verymilddemented
AugmentedAlzheimerDataset\VeryMildDemented\00255a89-4920-4ac5-b9e9