In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load the CIFAR-100 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize the pixel values between 0 and 1
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Convert the labels to one-hot encoding
y_train = keras.utils.to_categorical(y_train, 100)
y_test = keras.utils.to_categorical(y_test, 100)


In [4]:
import os
from PIL import Image
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Define the path to the directory
directory_path = r'C:\Users\shaif\Downloads\Compressed\FracAtlas\FracAtlas\images'

# Initialize empty lists for images and labels
X_train = []
Y_train = []

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Loop through subdirectories (classes)
for class_folder in os.listdir(directory_path):
    class_path = os.path.join(directory_path, class_folder)
    class_label = class_folder
    
    # Encode class labels
    encoded_label = label_encoder.fit_transform([class_label])[0]

    for image_file in os.listdir(class_path):
        try:
            if image_file.endswith('.jpg'):
                image_path = os.path.join(class_path, image_file)
                
                # Load image, convert to RGB and resize
                img = Image.open(image_path).convert('RGB')
                img = img.resize((32, 32))
                img_array = np.array(img)
                
                # Append image and label to lists
                X_train.append(img_array)
                Y_train.append(encoded_label)
        except Exception as e:
            print(f"Error processing image file {image_file}: {e}")

# Convert lists to numpy arrays
X_train = np.array(X_train)
Y_train = np.array(Y_train)

print(f'X_train shape: {X_train.shape}')
print(f'Y_train shape: {Y_train.shape}')


Error processing image file IMG0004028.jpg: image file is truncated (20 bytes not processed)
Error processing image file IMG0004029.jpg: image file is truncated (22 bytes not processed)
Error processing image file IMG0004036.jpg: image file is truncated (14 bytes not processed)
Error processing image file IMG0004070.jpg: image file is truncated (41 bytes not processed)
Error processing image file IMG0004073.jpg: image file is truncated (3 bytes not processed)
Error processing image file IMG0004076.jpg: image file is truncated (0 bytes not processed)
Error processing image file IMG0004079.jpg: image file is truncated (24 bytes not processed)
Error processing image file IMG0004084.jpg: image file is truncated (5 bytes not processed)
Error processing image file IMG0004092.jpg: image file is truncated (3 bytes not processed)
Error processing image file IMG0004098.jpg: image file is truncated (17 bytes not processed)
Error processing image file IMG0004100.jpg: image file is truncated (15 by

In [7]:
Y_train[2500]

0

In [8]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.resnet import ResNet50, preprocess_input
import glob

target_size = (32, 32)  # Change the values as per your requirement
# Load the pre-trained ResNet50 model with modified input shape
model = ResNet50(weights='imagenet', include_top=False, pooling='avg', input_shape=(target_size[0], target_size[1], 3))

In [1]:
import pandas as pd
# Define the file path
file_path = r"F:\21k_avg_feature.csv"
# Read the tab-separated CSV file into a DataFrame
df = pd.read_csv(file_path, delimiter='\t')
columns_to_drop = [df.columns[0], df.columns[-1]]
data = df.drop(columns_to_drop, axis=1)
# Display the head of the DataFrame
data.head(1)

Unnamed: 0,Feature 0,Feature 1,Feature 2,Feature 3,Feature 4,Feature 5,Feature 6,Feature 7,Feature 8,Feature 9,...,Feature 2038,Feature 2039,Feature 2040,Feature 2041,Feature 2042,Feature 2043,Feature 2044,Feature 2045,Feature 2046,Feature 2047
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.037739,0.0,0.0,0.0,0.0,3.016108,0.0,0.0


In [7]:
np.shape(X_train)

(9590, 32, 32, 3)

In [10]:
from PIL import Image
import numpy as np
ft = model.predict(np.array(X_train).astype("float32"))



In [11]:
from sklearn.cluster import MiniBatchKMeans

n_clusters = 400
batch_size = 100
max_iter = 100

kmeans = MiniBatchKMeans(n_clusters=n_clusters, batch_size=batch_size, max_iter=max_iter)
kmeans.fit(ft)
# Retrieve the cluster centers
ct = kmeans.cluster_centers_.tolist()

  super()._check_params_vs_input(X, default_n_init=3)


In [12]:
from tqdm import tqdm

combo_list = []  # Initialize combo_list
tot_dist = []
# Iterate over ft
for i in tqdm(range(len(ct))):
    distances = []

    # Calculate distances for each row in data
    for index, row in data.iterrows():
        row_array = row.to_numpy()  # Convert row to numpy array
        distance = np.linalg.norm(ct[i] - row_array)  # Calculate Euclidean distance
        distances.append(distance)

    tot_dist.append(distances)
print(np.shape(tot_dist))
#new_shape = (1000, 30)
#re_dist = np.transpose(tot_dist, (1, 0))
re_dist = tot_dist
print(np.shape(re_dist))

100%|██████████████████████████████████████████████████████████████████| 400/400 [15:54<00:00,  2.39s/it]


(400, 10450)
(400, 10450)


In [None]:
import pickle

# Saving the list to a file in the D: drive
file_path = r"F:\dist_list.pickle"  # r prefix is used for raw string to avoid escape characters

with open(file_path, 'wb') as file:
    pickle.dump(re_dist, file)

print("Combo list saved successfully.")

In [7]:
len(index_list)

400

In [6]:
import pickle

# Loading the list from the pickle file
file_path = r"F:\index_list.pickle"  # Update the file path if necessary

with open(file_path, 'rb') as file:
    index_list = pickle.load(file)

print("Combo list loaded successfully.")
print(index_list)


Combo list loaded successfully.
[2, 22, 52, 55, 72, 89, 90, 91, 92, 94, 95, 103, 140, 496, 497, 500, 510, 547, 579, 596, 599, 602, 604, 616, 618, 952, 954, 975, 1002, 1005, 1022, 1039, 1040, 1041, 1042, 1044, 1045, 1053, 1090, 1446, 1447, 1450, 1497, 1529, 1546, 1549, 1552, 1554, 1566, 1568, 1902, 1904, 1925, 1950, 2002, 2005, 2022, 2039, 2040, 2041, 2042, 2044, 2045, 2053, 2090, 2446, 2447, 2450, 2460, 2497, 2529, 2546, 2549, 2552, 2554, 2566, 2568, 2902, 2904, 2925, 2950, 3002, 3039, 3040, 3041, 3042, 3044, 3045, 3053, 3090, 3446, 3447, 3450, 3497, 3529, 3546, 3549, 3552, 3554, 3566, 3568, 3902, 3904, 3925, 3950, 4002, 4039, 4040, 4041, 4042, 4044, 4045, 4053, 4090, 4446, 4447, 4450, 4497, 4529, 4546, 4549, 4552, 4554, 4566, 4568, 4902, 4904, 4925, 4950, 5002, 5005, 5039, 5040, 5041, 5042, 5044, 5045, 5053, 5090, 5446, 5447, 5450, 5497, 5529, 5546, 5549, 5552, 5554, 5566, 5568, 5902, 5904, 5925, 5950, 6011, 6059, 6088, 6090, 6091, 6092, 6093, 6099, 6100, 6101, 6106, 6107, 6108, 6110,

In [7]:
df.columns

Index(['Directory', 'Feature 0', 'Feature 1', 'Feature 2', 'Feature 3',
       'Feature 4', 'Feature 5', 'Feature 6', 'Feature 7', 'Feature 8',
       ...
       'Feature 2039', 'Feature 2040', 'Feature 2041', 'Feature 2042',
       'Feature 2043', 'Feature 2044', 'Feature 2045', 'Feature 2046',
       'Feature 2047', 'Unnamed: 2049'],
      dtype='object', length=2050)

In [8]:
selected_data = df.loc[index_list, ['Directory']].to_numpy().flatten()

In [11]:
import pickle

# Saving the list to a file in the D: drive
file_path = r"F:\combo_list.pickle"  # r prefix is used for raw string to avoid escape characters

with open(file_path, 'wb') as file:
    pickle.dump(selected_data, file)

print("Combo list saved successfully.")


Combo list saved successfully.


In [12]:
import pickle

# Loading the list from the pickle file
file_path = r"F:\combo_list.pickle"  # Update the file path if necessary

with open(file_path, 'rb') as file:
    loaded_combo_list = pickle.load(file)

print("Combo list loaded successfully.")
print(loaded_combo_list)


Combo list loaded successfully.
['F:/imagenet21k_resized/imagenet21k_train\\n00007846\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n00007846\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n00007846\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n00021265\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n00440941\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n00445802\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n00446311\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n00446493\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n00446804\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n00447073\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n00447221\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n00448748\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n00463543\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n01579149\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n01579260\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n01579729\\'
 'F:/imagenet21k_resized/imagenet21k_train\\n01583495\\'