In [2]:
import os
import cv2
import numpy as np
from typing import List
import matplotlib.pyplot as plt

In [3]:
seed = 1234
np.random.seed(seed)

In [4]:
def process_data(directories: List[str], image_size=(32, 32)):
    images = []
    labels = []
    for i, label_directory in enumerate(directories):
        for filename in os.listdir(label_directory):
            filepath = os.path.join(label_directory, filename)
            img = cv2.imread(filepath)
            if img is not None:
                img = cv2.resize(img, image_size)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                images.append(img)
                labels.append(i)
    return np.array(images), np.array(labels)

In [6]:
directory = ['Eggs/NotDamaged', 'Eggs/Damaged']
images, labels = process_data(directory)

In [7]:
np.savez(images=images, labels=labels, file='Dataset/eggs.npz')

In [8]:
directory = ['corals/healthy_corals', 'corals/bleached_corals']
images, labels = process_data(directory)

In [10]:
np.savez(images=images, labels=labels, file='Dataset/corals.npz')

In [1]:
import os
import cv2
import numpy as np
from sklearn.preprocessing import LabelEncoder

def load_images_from_folder(folder):
    images = []
    labels = []
    for label_dir in os.listdir(folder):
        label_path = os.path.join(folder, label_dir)
        if os.path.isdir(label_path):
            for filename in os.listdir(label_path):
                img_path = os.path.join(label_path, filename)
                try:
                    img = cv2.imread(img_path)
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
                    img = cv2.resize(img, (32, 32))  # Resize to a fixed size (optional)
                    images.append(img)
                    labels.append(label_dir)
                except Exception as e:
                    print(f"Error loading image {img_path}: {e}")
    return images, labels

def process_dataset(data_dir):
    train_dir = os.path.join(data_dir, 'train')
    test_dir = os.path.join(data_dir, 'test')
    val_dir = os.path.join(data_dir, 'val')

    # Load training data
    train_images, train_labels = load_images_from_folder(train_dir)

    # Load test data
    test_images, test_labels = load_images_from_folder(test_dir)

    val_images, val_labels = load_images_from_folder(val_dir)
    
    print(len(train_images), len(test_images))
    print(len(train_labels), len(test_labels))
    print(len(val_images), len(val_labels))

    # Merge train and test data
    images = train_images + test_images + val_images
    labels = train_labels + test_labels + val_labels
    print(len(images))
    print(len(labels))
    
    # Encode labels
    le = LabelEncoder()
    labels_encoded = le.fit_transform(labels)

    # Convert lists to numpy arrays
    images = np.array(images)
    labels_encoded = np.array(labels_encoded)

    # Save to npz file
    np.savez_compressed('Dataset/xray.npz', 
                        images=images, labels=labels_encoded,
                        label_classes=le.classes_)

    print("Data has been saved to image_classification_data.npz")

In [2]:
data_dir = 'chest_xray'  # Replace with the actual path
process_dataset(data_dir)

5216 624
5216 624
16 16
5856
5856
Data has been saved to image_classification_data.npz


In [15]:
import os
import cv2
import numpy as np
from sklearn.preprocessing import LabelEncoder

def load_images_from_folder(folder):
    images = []
    labels = []
    for label_dir in os.listdir(folder):
        if label_dir == 'Viral Pneumonia':
            continue
        print(label_dir)
        label_path = os.path.join(folder, label_dir)
        if os.path.isdir(label_path):
            for filename in os.listdir(label_path):
                img_path = os.path.join(label_path, filename)
                try:
                    img = cv2.imread(img_path)
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
                    img = cv2.resize(img, (32, 32))  # Resize to a fixed size (optional)
                    images.append(img)
                    labels.append(label_dir)
                except Exception as e:
                    print(f"Error loading image {img_path}: {e}")
    return images, labels

def process_dataset(data_dir):
    train_dir = os.path.join(data_dir, 'train')
    test_dir = os.path.join(data_dir, 'test')

    # Load training data
    train_images, train_labels = load_images_from_folder(train_dir)

    # Load test data
    test_images, test_labels = load_images_from_folder(test_dir)

    
    print(len(train_images), len(test_images))
    print(len(train_labels), len(test_labels))

    # Merge train and test data
    images = train_images + test_images
    labels = train_labels + test_labels
    print(len(images))
    print(len(labels))
    
    # Encode labels
    le = LabelEncoder()
    labels_encoded = le.fit_transform(labels)

    # Convert lists to numpy arrays
    images = np.array(images)
    labels_encoded = np.array(labels_encoded)

    # Save to npz file
    np.savez_compressed('Dataset/covid.npz', 
                        images=images, labels=labels_encoded,
                        label_classes=le.classes_)

    print("Data has been saved to image_classification_data.npz")

In [16]:
data_dir = 'Covid19-dataset'  # Replace with the actual path
process_dataset(data_dir)

Covid
Normal
Covid
Normal
181 46
181 46
227
227
Data has been saved to image_classification_data.npz


Exception ignored in atexit callback: <bound method InteractiveShell.atexit_operations of <ipykernel.zmqshell.ZMQInteractiveShell object at 0x7b6761407950>>
Traceback (most recent call last):
  File "/home/parkmyungheon/anaconda3/envs/Keras/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3920, in atexit_operations
    self._atexit_once()
  File "/home/parkmyungheon/anaconda3/envs/Keras/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3903, in _atexit_once
    self.history_manager.end_session()
  File "/home/parkmyungheon/anaconda3/envs/Keras/lib/python3.11/site-packages/IPython/core/history.py", line 585, in end_session
    self.db.execute(
sqlite3.OperationalError: attempt to write a readonly database


In [4]:
directories = ['Car-Bike-Dataset/Car', 'Car-Bike-Dataset/Bike']
images, labels = process_data(directories)

In [6]:
np.savez(images=images, labels=labels, file='Dataset/car_bike_raw.npz')

In [7]:
directories = ['pizza_not_pizza/pizza', 'pizza_not_pizza/not_pizza']
images, labels = process_data(directories, (32, 32))

In [8]:
np.savez(images=images, labels=labels, file='Dataset/pizza_raw_32.npz')

In [9]:
import tensorflow

2024-05-01 02:20:03.878756: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-01 02:20:03.881041: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-01 02:20:03.902550: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
def get_n_labels_random_samples(n, m):
    (X_train, y_train), (X_test, y_test) = tensorflow.keras.datasets.cifar10.load_data()
    X_combined = np.concatenate((X_train, X_test), axis=0)
    y_combined = np.concatenate((y_train, y_test), axis=0)

    filtered_images = []
    filtered_labels = []

    y_combined = y_combined.squeeze()
    
    for label in range(n):
        indices = np.where(y_combined == label)[0]
        np.random.shuffle(indices)
        indices = indices[: m]
        filtered_images.extend(X_combined[indices])
        filtered_labels.extend(y_combined[indices])

    filtered_images = np.array(filtered_images)
    filtered_labels = np.array(filtered_labels)
    return filtered_images, filtered_labels

In [11]:
n = 2
m = 500

In [12]:
images, labels = get_n_labels_random_samples(n, m)

In [13]:
np.savez(images=images, labels=labels, file='Dataset/cifar10_2_500.npz')