In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import cifar100
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load the CIFAR-100 dataset
(x_train, y_train), (x_test, y_test) = cifar100.load_data()

# Normalize the pixel values between 0 and 1
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Convert the labels to one-hot encoding
y_train = keras.utils.to_categorical(y_train, 100)
y_test = keras.utils.to_categorical(y_test, 100)


In [1]:
import os
from PIL import Image
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from tqdm import tqdm

# Define the path to the directory
directory_path = r"E:\imbcifar\train"

# Initialize empty lists for images and labels
X_train = []
Y_train = []
class_label = 0

# Loop through subdirectories (classes)
for class_folder in tqdm(sorted(os.listdir(directory_path))):
    class_path = os.path.join(directory_path, class_folder)
    
    for image_file in os.listdir(class_path):
        if image_file.endswith('.png'):
            image_path = os.path.join(class_path, image_file)
            
            # Load image, convert to RGB and resize
            img = Image.open(image_path).convert('RGB')
            img = img.resize((32, 32))
            img_array = np.array(img)
            
            # Append image and label to lists
            X_train.append(img_array)
            Y_train.append(class_label)
            
    class_label = class_label + 1

# Convert lists to numpy arrays
X_train = np.array(X_train)
Y_train = np.array(Y_train)

print(f'X_train shape: {X_train.shape}')
print(f'Y_train shape: {Y_train.shape}')

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.11it/s]

X_train shape: (29009, 32, 32, 3)
Y_train shape: (29009,)





In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from tqdm import tqdm

# Load CIFAR-100 dataset
(train_data, test_data), ds_info = tfds.load('cifar100', split=['train', 'test'], 
                                             shuffle_files=True, as_supervised=True,
                                             with_info=True)

# Function to create class imbalance
def create_class_imbalance(dataset, imbalance_ratio=0.1):
    class_counts = [500] * 100  # CIFAR-100 has 500 images per class

    # Reduce the number of images for every alternate class
    imbalanced_class_counts = [int(count * imbalance_ratio) if i % 2 == 0 else count
                               for i, count in enumerate(class_counts)]

    imbalanced_dataset = []
    class_counter = {i: 0 for i in range(100)}  # Track class counts

    # Using tqdm to show progress
    for image, label in tqdm(dataset, desc="Processing Imbalance"):
        label = label.numpy()  # Convert label to numpy for comparison
        if class_counter[label] < imbalanced_class_counts[label]:
            imbalanced_dataset.append((image, label))
            class_counter[label] += 1

    return imbalanced_dataset

# Create imbalanced CIFAR-100 dataset
imbalanced_train_data = create_class_imbalance(train_data)

# Preprocessing function (normalize)
def preprocess(image, label):
    image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0, 1]
    return image, label

# Apply normalization to the imbalanced dataset
x_train, y_train = [], []

for image, label in tqdm(imbalanced_train_data, desc="Normalizing Data"):
    image, label = preprocess(image, label)
    x_train.append(image.numpy())  # Convert image to numpy array
    y_train.append(label)  # Directly append the label since it's already an integer

X_train = np.array(x_train)
y_train = np.array(y_train)

# Ready for TensorFlow training
print(f'x_train shape: {X_train.shape}, y_train shape: {y_train.shape}')


Processing Imbalance: 100%|████████████████████████████████████████████████████| 50000/50000 [00:20<00:00, 2428.04it/s]
Normalizing Data: 100%|████████████████████████████████████████████████████████| 27500/27500 [00:11<00:00, 2333.56it/s]


x_train shape: (27500, 32, 32, 3), y_train shape: (27500,)


In [None]:
import os
from PIL import Image
import numpy as np
from tqdm import tqdm

# Define the path to the directory
directory_path = r"G:\data\inat\data\train_val2018\train_val2018"

# Initialize empty lists for images and labels
X_train = []
Y_train = []
class_label = 0
max_images_per_class = 12000  # Set the limit to 20,000 images per top-level directory

# Loop through top-level subdirectories (classes) with tqdm
for class_folder in tqdm(sorted(os.listdir(directory_path)), desc="Processing directories"):
    class_path = os.path.join(directory_path, class_folder)
    
    # Check if it's a directory
    if os.path.isdir(class_path):
        # Initialize a counter for the number of images processed in this class
        image_count = 0
        
        # Get all image files in this directory and subdirectories
        image_files = []
        for root, _, files in os.walk(class_path):
            image_files += [os.path.join(root, file) for file in files if file.endswith('.jpg')]
        
        # Loop through all images with tqdm, but stop once 20,000 images are reached
        for image_path in tqdm(image_files, desc=f"Processing images in {class_folder}", leave=False):
            if image_count >= max_images_per_class:
                break  # Stop processing this directory once 20,000 images are reached

            # Load image, convert to RGB and resize
            img = Image.open(image_path).convert('RGB')
            img = img.resize((32, 32))
            img_array = np.array(img)
            
            # Append image and label to lists
            X_train.append(img_array)
            Y_train.append(class_label)
            
            # Increment the image count for this class
            image_count += 1
        
        # Increment the class label after processing each top-level folder
        class_label += 1

# Convert lists to numpy arrays
X_train = np.array(X_train)
Y_train = np.array(Y_train)

print(f'X_train shape: {X_train.shape}')
print(f'Y_train shape: {Y_train.shape}')


In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.resnet import ResNet50, preprocess_input
import glob

target_size = (32, 32)  # Change the values as per your requirement
# Load the pre-trained ResNet50 model with modified input shape
model = ResNet50(weights='imagenet', include_top=False, pooling='avg', input_shape=(target_size[0], target_size[1], 3))

In [2]:
import pandas as pd
# Define the file path
file_path = r"D:\feature.csv"
# Read the tab-separated CSV file into a DataFrame
df = pd.read_csv(file_path, delimiter='\t')
columns_to_drop = [df.columns[0], df.columns[-1]]
data = df.drop(columns_to_drop, axis=1)
# Display the head of the DataFrame
data.head(1)

Unnamed: 0,Feature 0,Feature 1,Feature 2,Feature 3,Feature 4,Feature 5,Feature 6,Feature 7,Feature 8,Feature 9,...,Feature 2038,Feature 2039,Feature 2040,Feature 2041,Feature 2042,Feature 2043,Feature 2044,Feature 2045,Feature 2046,Feature 2047
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.009121,0.0,0.0,0.0,0.0,3.285227,0.0,0.0


In [4]:
np.shape(X_train)

(4357, 32, 32, 3)

In [5]:
from PIL import Image
import numpy as np
ft = model.predict(np.array(X_train).astype("float32"))

In [6]:
from sklearn.cluster import MiniBatchKMeans

n_clusters = 400
batch_size = 100
max_iter = 100

kmeans = MiniBatchKMeans(n_clusters=n_clusters, batch_size=batch_size, max_iter=max_iter)
kmeans.fit(ft)
# Retrieve the cluster centers
ct = kmeans.cluster_centers_.tolist()

  super()._check_params_vs_input(X, default_n_init=3)


In [7]:
from tqdm import tqdm

combo_list = []  # Initialize combo_list
tot_dist = []
# Iterate over ft
for i in tqdm(range(len(ct))):
    distances = []

    # Calculate distances for each row in data
    for index, row in data.iterrows():
        row_array = row.to_numpy()  # Convert row to numpy array
        distance = np.linalg.norm(ct[i] - row_array)  # Calculate Euclidean distance
        distances.append(distance)

    tot_dist.append(distances)
print(np.shape(tot_dist))
#new_shape = (1000, 30)
#re_dist = np.transpose(tot_dist, (1, 0))
re_dist = tot_dist
print(np.shape(re_dist))

100%|████████████████████████████████████████████████████████████████████████████████| 400/400 [01:29<00:00,  4.49it/s]


(400, 1000)
(400, 1000)


In [3]:
import pickle


# Saving the list to a file in the D: drive
file_path = r"D:\dist_list.pickle"  # r prefix is used for raw string to avoid escape characters

with open(file_path, 'wb') as file:
    pickle.dump(re_dist, file)

print("Combo list saved successfully.")


NameError: name 're_dist' is not defined

In [9]:
len(index_list)

NameError: name 'index_list' is not defined

In [5]:
import pickle

# Loading the list from the pickle file
file_path = r"D:\index_list.pickle"  # Update the file path if necessary

with open(file_path, 'rb') as file:
    index_list = pickle.load(file)

print("Combo list loaded successfully.")
print(index_list)


Combo list loaded successfully.
[7, 16, 18, 19, 21, 22, 23, 51, 87, 88, 89, 93, 95, 96, 105, 112, 117, 121, 122, 125, 127, 130, 132, 144, 145, 151, 152, 153, 154, 155, 156, 157, 158, 161, 162, 164, 165, 166, 167, 169, 173, 174, 175, 176, 179, 180, 181, 183, 188, 190, 194, 195, 196, 197, 198, 199, 200, 201, 204, 205, 206, 208, 212, 214, 215, 216, 217, 218, 219, 221, 222, 223, 224, 226, 227, 228, 229, 230, 231, 232, 233, 234, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 256, 257, 258, 261, 262, 264, 265, 266, 267, 268, 284, 295, 297, 302, 306, 321, 324, 332, 338, 355, 359, 361, 362, 364, 366, 367, 368, 369, 370, 374, 375, 377, 378, 379, 380, 381, 382, 384, 387, 388, 389, 392, 399, 400, 401, 402, 409, 413, 414, 417, 418, 420, 422, 423, 424, 426, 429, 432, 433, 439, 440, 441, 442, 443, 445, 446, 447, 448, 450, 451, 452, 456, 457, 459, 461, 464, 465, 466, 469, 473, 475, 477, 479, 486, 487, 488, 494, 496, 499, 501, 502, 504, 505, 506, 507, 51

In [5]:
df.columns

Index(['Directory', 'Feature 0', 'Feature 1', 'Feature 2', 'Feature 3',
       'Feature 4', 'Feature 5', 'Feature 6', 'Feature 7', 'Feature 8',
       ...
       'Feature 2039', 'Feature 2040', 'Feature 2041', 'Feature 2042',
       'Feature 2043', 'Feature 2044', 'Feature 2045', 'Feature 2046',
       'Feature 2047', 'Unnamed: 2049'],
      dtype='object', length=2050)

In [6]:
selected_data = df.loc[index_list, ['Name']].to_numpy().flatten()

In [7]:
import pickle

# Saving the list to a file in the D: drive
file_path = r"D:\combo_list.pickle"  # r prefix is used for raw string to avoid escape characters

with open(file_path, 'wb') as file:
    pickle.dump(selected_data, file)

print("Combo list saved successfully.")


Combo list saved successfully.


In [8]:
import pickle

# Loading the list from the pickle file
file_path = r"D:\combo_list.pickle"  # Update the file path if necessary

with open(file_path, 'rb') as file:
    loaded_combo_list = pickle.load(file)

print("Combo list loaded successfully.")
print(loaded_combo_list)


Combo list loaded successfully.
['D:/data/imagenet\\n01514668\\' 'D:/data/imagenet\\n01560419\\'
 'D:/data/imagenet\\n01582220\\' 'D:/data/imagenet\\n01592084\\'
 'D:/data/imagenet\\n01608432\\' 'D:/data/imagenet\\n01614925\\'
 'D:/data/imagenet\\n01616318\\' 'D:/data/imagenet\\n01704323\\'
 'D:/data/imagenet\\n01817953\\' 'D:/data/imagenet\\n01818515\\'
 'D:/data/imagenet\\n01819313\\' 'D:/data/imagenet\\n01829413\\'
 'D:/data/imagenet\\n01843065\\' 'D:/data/imagenet\\n01843383\\'
 'D:/data/imagenet\\n01882714\\' 'D:/data/imagenet\\n01943899\\'
 'D:/data/imagenet\\n01968897\\' 'D:/data/imagenet\\n01981276\\'
 'D:/data/imagenet\\n01983481\\' 'D:/data/imagenet\\n01986214\\'
 'D:/data/imagenet\\n02002556\\' 'D:/data/imagenet\\n02007558\\'
 'D:/data/imagenet\\n02009912\\' 'D:/data/imagenet\\n02051845\\'
 'D:/data/imagenet\\n02056570\\' 'D:/data/imagenet\\n02085620\\'
 'D:/data/imagenet\\n02085782\\' 'D:/data/imagenet\\n02085936\\'
 'D:/data/imagenet\\n02086079\\' 'D:/data/imagenet\\n02086

In [5]:
import os

file_path = r"F:\D-Video\Python"

# Check if the file exists
if os.path.exists(file_path):
    print(f"File exists: {file_path}")
else:
    print(f"File does not exist: {file_path}")

# Check if the file is readable
if os.access(file_path, os.R_OK):
    print(f"File is readable: {file_path}")
else:
    print(f"File is not readable: {file_path}")


File exists: F:\D-Video\Python
File is readable: F:\D-Video\Python
