In [2]:
import os
import cv2
import numpy as np
from sklearn.cluster import MiniBatchKMeans

def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            images.append(img)
    return images

def preprocess_images(images, target_size=(100, 100)):
    preprocessed_images = []
    for img in images:
        resized_img = cv2.resize(img, target_size)
        normalized_img = resized_img / 255.0  # Normalize pixel values
        preprocessed_images.append(normalized_img.astype(np.uint8))  # Convert to CV_8UC3
    return np.array(preprocessed_images)

# Define folder paths
folder_paths = [
    './raffaello',
    './COAP',
    './COAP/bag',
    './COAP/dress',
    './COAP/outers',
    './COAP/pants',
    './COAP/shoes',
    './COAP/top',
    './GABABA/outers',
    './GABABA/pants',
    './GABABA/tops',
    './ZARA_men/bag',
    './ZARA_men/outer',
    './ZARA_men/pants',
    './ZARA_men/shoes',
    './ZARA_men/top'
]

# Load all images from folders
all_images = []
for folder_path in folder_paths:
    all_images.extend(load_images_from_folder(folder_path))

# Preprocess all images
processed_images = preprocess_images(all_images)
flattened_images = processed_images.reshape(processed_images.shape[0], -1)

# Initialize MiniBatchKMeans object
num_clusters = 5  # Specify number of clusters
kmeans = MiniBatchKMeans(n_clusters=num_clusters, random_state=42)

# Fit KMeans to data
kmeans.fit(flattened_images)

# Create directories for each cluster
output_dir = "./clustered_data"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save images to cluster folders
for i in range(num_clusters):
    cluster_dir = os.path.join(output_dir, f"cluster_{i}")
    if not os.path.exists(cluster_dir):
        os.makedirs(cluster_dir)
    cluster_indices = np.where(kmeans.labels_ == i)[0]
    for idx in cluster_indices:
        img = all_images[idx]
        filename = f"image_{idx}.jpg"
        cv2.imwrite(os.path.join(cluster_dir, filename), img)

In [4]:
import os
import cv2
import numpy as np
from sklearn.cluster import MiniBatchKMeans

def load_images_from_folders(folders):
    images = []
    for folder in folders:
        for filename in os.listdir(folder):
            img = cv2.imread(os.path.join(folder, filename))
            if img is not None:
                images.append(img)
    return images

def preprocess_images(images, target_size=(100, 100)):
    preprocessed_images = []
    for img in images:
        resized_img = cv2.resize(img, target_size)
        normalized_img = resized_img / 255.0  # Normalize pixel values
        preprocessed_images.append(normalized_img.astype(np.uint8))  # Convert to CV_8UC3
    return np.array(preprocessed_images)

# Define folder paths
folder_paths = [
    './raffaello',
    './COAP',
    './COAP/bag',
    './COAP/dress',
    './COAP/outers',
    './COAP/pants',
    './COAP/shoes',
    './COAP/top',
    './GABABA/outers',
    './GABABA/pants',
    './GABABA/tops',
    './ZARA_men/bag',
    './ZARA_men/outer',
    './ZARA_men/pants',
    './ZARA_men/shoes',
    './ZARA_men/top'
]

output_folder = "./clustered_data_2"
num_clusters = 10  # Number of clusters

# Create output folder if it does not exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Load images from folders
all_images = load_images_from_folders(folder_paths)

# Preprocess all images
processed_images = preprocess_images(all_images)
flattened_images = processed_images.reshape(processed_images.shape[0], -1)

# Initialize MiniBatchKMeans object with 10 clusters
kmeans = MiniBatchKMeans(n_clusters=num_clusters, random_state=42)

# Fit KMeans to data
kmeans.fit(flattened_images)

# Save clustered data into new folders in the output folder
for i in range(num_clusters):
    cluster_folder = os.path.join(output_folder, f"cluster_{i}")
    if not os.path.exists(cluster_folder):
        os.makedirs(cluster_folder)
    cluster_indices = np.where(kmeans.labels_ == i)[0]
    for idx in cluster_indices:
        img = all_images[idx]
        filename = f"image_{idx}.jpg"
        cv2.imwrite(os.path.join(cluster_folder, filename), img)


In [3]:
# cluster_0 re_cluster

def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            images.append(img)
    return images

def preprocess_images(images, target_size=(100, 100)):
    preprocessed_images = []
    for img in images:
        resized_img = cv2.resize(img, target_size)
        normalized_img = resized_img / 255.0  # Normalize pixel values
        preprocessed_images.append(normalized_img.astype(np.uint8))  # Convert to CV_8UC3
    return np.array(preprocessed_images)

def run_machine_learning(folder_path, num_groups, output_folder):
    # Load images from the cluster folder
    images = load_images_from_folder(folder_path)
    # Preprocess images
    processed_images = preprocess_images(images)
    flattened_images = processed_images.reshape(processed_images.shape[0], -1)
    # Initialize MiniBatchKMeans object with specified number of groups
    kmeans = MiniBatchKMeans(n_clusters=num_groups, random_state=42)
    # Fit KMeans to data
    kmeans.fit(flattened_images)
    # Save clustered data into new folders in the output folder
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    for i in range(num_groups):
        group_folder = os.path.join(output_folder, f"group_{i}")
        if not os.path.exists(group_folder):
            os.makedirs(group_folder)
        group_indices = np.where(kmeans.labels_ == i)[0]
        for idx in group_indices:
            img = images[idx]
            filename = f"image_{idx}.jpg"
            cv2.imwrite(os.path.join(group_folder, filename), img)

# Define folder paths and number of groups
input_folder = "./clustered_data_2/cluster_0"
output_folder = "./clustered_data_3/CL_0"
num_groups = 4

# Run machine learning code
run_machine_learning(input_folder, num_groups, output_folder)

In [4]:
# cluster_2 re_cluster

# Define folder paths and number of groups
input_folder = "./clustered_data_2/cluster_2"
output_folder = "./clustered_data_3/CL_2"
num_groups = 5

# Run machine learning code
run_machine_learning(input_folder, num_groups, output_folder)

In [5]:
# cluster_3 re_cluster

# Define folder paths and number of groups
input_folder = "./clustered_data_2/cluster_3"
output_folder = "./clustered_data_3/CL_3"
num_groups = 3

# Run machine learning code
run_machine_learning(input_folder, num_groups, output_folder)

In [6]:
# cluster_5 re_cluster

# Define folder paths and number of groups
input_folder = "./clustered_data_2/cluster_5"
output_folder = "./clustered_data_3/CL_5"
num_groups = 2

# Run machine learning code
run_machine_learning(input_folder, num_groups, output_folder)

In [7]:
# cluster_6 re_cluster

# Define folder paths and number of groups
input_folder = "./clustered_data_2/cluster_6"
output_folder = "./clustered_data_3/CL_6"
num_groups = 3

# Run machine learning code
run_machine_learning(input_folder, num_groups, output_folder)

In [8]:
# cluster_8 re_cluster

# Define folder paths and number of groups
input_folder = "./clustered_data_2/cluster_8"
output_folder = "./clustered_data_3/CL_8"
num_groups = 3

# Run machine learning code
run_machine_learning(input_folder, num_groups, output_folder)

In [9]:
# cluster_9 re_cluster

# Define folder paths and number of groups
input_folder = "./clustered_data_2/cluster_9"
output_folder = "./clustered_data_3/CL_9"
num_groups = 2

# Run machine learning code
run_machine_learning(input_folder, num_groups, output_folder)