<a href="https://colab.research.google.com/github/Samruddhi-saoji/Dataset_handler/blob/main/Dataset_handler.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Image handling

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import glob

###### read an image ####
def read(img_path):
   return cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)


##### save an image ###
def save(img, img_path="output.jpeg"):
   cv2.imwrite(img_path, img)


### get a list of all the images in a folder ####
# returns (list of images, list of names)
def list_images(folder_path):
    result = []

    # get list of names of all the images in the folder
    file_names = os.listdir(folder_path)

    # read each image as numpy aaray
    for img_name in file_names:
        # full path of the img = folder_path + image name
        image_path = os.path.join(folder_path, img_name)

        # Read the image and add it to the list
        img = read(image_path)
        if img is not None:
            result.append(read(image_path))

    return (result, file_names)


#delete all files in a folder
def delete_all_files(folder_path):
    files = glob.glob(os.path.join(folder_path, '*'))
    for file in files:
        if os.path.isfile(file):
            os.remove(file)


# Display image
def display(img):
    plt.imshow(img, cmap='gray')
    plt.axis('off')  # Remove axes
    plt.show()

#compress image
  # reduce img from size (r1, c1) to (r2, c2)
  # every (r1/r2)th row and every (c1/c2)th vol of the original img shld be copied to the compressed img
def compress_img(img, r1, c1, r2, c2):
    compressed_img = img[::r1 // r2, ::c1 // c2]
    return compressed_img


# (h_old, w_old) to (h_new, w_new)
def crop(img, r_start, num_rows, c_start, num_cols):
    cropped_img = img[r_start:r_start + num_rows, c_start: c_start + num_cols :]
    return cropped_img


#create a composite image from a list of images
# images = list of images
def get_composite(images):
    # Ensure all images have the same shape
    image_shape = images[0].shape
    for img in images:
        if img.shape != image_shape:
            raise ValueError("All images must have the same shape")

    # Initialize an array to store the composite image
    composite_image = np.zeros_like(images[0], dtype=np.float32)

    # Sum the pixel values from all images
    for img in images:
        composite_image += img.astype(np.float32)

    # Normalize the composite image to the range [0, 255]
    composite_image = (composite_image / len(images)).astype(np.uint8)

    return composite_image

# Dataset handler

In [None]:
class Dataset:
    def __init__(self, dataset_path, expected_shape):
        self.dataset_path = dataset_path
        self.category_names = os.listdir(dataset_path)[1:]
        self.encoding = {category: i for i, category in enumerate(self.category_names)}
        self.num_categories = len(self.category_names)
        self.shape = expected_shape
        self.file_names = []

        self.X = [] #list of images #3D
        self.Y = [] #list of labels #1D

        self.num_images = 0 #no of images



    def load_data(self):
        for category in self.category_names:
            cat_dir_path =os.path.join(self.dataset_path, category)

            # now get the images in this folder
            images, file_names = list_images(cat_dir_path)
            n = len(images)

            # add to data
            cat_id = self.encoding[category]
            for i in range(n):
                img = images[i]
                fname = file_names[i]
                if img.shape == self.shape:
                   # data[img] = encoding[category]
                   self.X.append(img)
                   self.Y.append(cat_id)
                   self.file_names.append(fname)
        # update no of images
        self.num_images = len(self.Y)

        # convert to arrays
        self.X, self.Y = np.array(self.X), np.array(self.Y)

        # return self.X, self.Y



    def convert_to_csv(self, dest_path):
        image_height, image_width = self.shape

        # Initialize an empty DataFrame
        columns = [f"pixel_{i}" for i in range(image_height * image_width)]
        columns.append("label")
        df = pd.DataFrame(columns=columns)

        # Populate DataFrame with image data and labels
        for i in range(self.num_images):
            image_flat = self.X[i].flatten()  # Flatten the image
            row_data = np.append(image_flat, self.Y[i])  # Append label to flattened image
            df.loc[i] = row_data

        # Save DataFrame to CSV
        df.to_csv(dest_path, index=False)
        print(f"CSV file saved at {dest_path}")



    # (r_old, c_old) to (r_new, c_new)
    def compress_data(self, r_new, c_new):
        r_old, c_old = self.shape[0], self.shape[1]
        compressed_images = [] # list of compressed data

        # compress each img in self.X
        for i in range(self.num_images):
            img = self.X[i]
            compressed_images.append(compress_img(img, r_old, c_old, r_new, c_new))

        return compressed_images



    # (r_old, c_old) to (r_new, c_new)
    def create_compressed_dataset(self, r_new, c_new, dest_path):
        # r_old, c_old = self.shape[0], self.shape[1]
        compressed_data = self.compress_data(r_new, c_new)

        # initialise the new dataset
        # at dest path, create the new folder
        # inside the folder, create the subfolders for each category
        paths = {} # dict of category_id:path pairs
        for cat in self.category_names:
            cat_path = os.path.join(dest_path, cat)  # Path to category subfolder
            os.makedirs(cat_path, exist_ok=True)  # Create the subfolder
            print(f"Created subfolder: {cat_path}")

            paths[self.encoding[cat]] = cat_path


        # save each compressed img to new dataset
        for i in range(self.num_images):
            img = compressed_data[i]
            cat_id = self.Y[i]
            file_name = self.file_names[i]
            path = os.path.join(paths[cat_id], file_name)
            save(img, path) #save img to folder
            print(f"{i}/{self.num_images}")
        print("new dataset created")