In [None]:
import os
import cv2  # For image processing
import numpy as np
import pandas as pd
from tqdm import tqdm  # For progress bar

# Path to the label file and images folder
label_file = 'D:\Project\identity_CelebA.txt'
image_folder = 'D:\Project\Images'

# Read the label file into a DataFrame
labels_df = pd.read_csv(label_file, delim_whitespace=True, header=None, names=['image', 'celebrity_id'])

# Append .jpg to each image name in the DataFrame
labels_df['image'] = labels_df['image'].astype(str)

# Display the first few rows to verify
print(labels_df.head())

# Check if the images exist in the folder
for image_name in labels_df['image']:
    if not os.path.exists(os.path.join(image_folder, image_name)):
        print(f"Warning: {image_name} not found in {image_folder}")


In [None]:
# Define the image size for resizing (e.g., 128x128 pixels)
IMG_SIZE = 128

def preprocess_images(image_folder, labels_df, img_size=128):
    image_data = []
    image_labels = []

    for index, row in tqdm(labels_df.iterrows(), total=labels_df.shape[0]):
        img_name = row['image']
        img_path = os.path.join(image_folder, img_name)

        # Read and process each image
        try:
            # Read the image
            img = cv2.imread(img_path)
            if img is not None:
                # Resize the image to IMG_SIZE x IMG_SIZE
                img = cv2.resize(img, (img_size, img_size))
                
                # Normalize pixel values to the range [0, 1]
                img = img.astype('float32') / 255.0

                # Append the image data and corresponding label
                image_data.append(img)
                image_labels.append(row['celebrity_id'])
            else:
                print(f"Image {img_name} not found or corrupted.")
        except Exception as e:
            print(f"Error processing image {img_name}: {e}")

    # Convert lists to NumPy arrays
    image_data = np.array(image_data)
    image_labels = np.array(image_labels)

    return image_data, image_labels

# Preprocess the images
image_data, image_labels = preprocess_images(image_folder, labels_df, IMG_SIZE)

# Display the shape of the processed data
print(f"Processed image data shape: {image_data.shape}")
print(f"Processed image labels shape: {image_labels.shape}")


In [None]:
import imgaug.augmenters as iaa

# Define augmentation sequence
seq = iaa.Sequential([
    iaa.Fliplr(0.5),  # horizontal flips
    iaa.Affine(rotate=(-25, 25)),  # rotate by -25 to 25 degrees
    iaa.Multiply((0.8, 1.2)),  # change brightness
    iaa.GaussianBlur(sigma=(0, 1.0)),  # Gaussian blur
])

# Augment images
augmented_images = seq(images=image_data)  # assuming image_data is a numpy array

In [None]:
from keras.applications import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.models import Model

# Load the VGG16 model
base_model = VGG16(weights='imagenet', include_top=False)
model = Model(inputs=base_model.input, outputs=base_model.output)

# Prepare images for feature extraction
def extract_features(img_array):
    img_array = preprocess_input(img_array)  # preprocess for VGG16
    features = model.predict(img_array)
    return features

# Extract features for the augmented images
features = []
for img in augmented_images:
    feature_vector = extract_features(np.expand_dims(img, axis=0))
    features.append(feature_vector)

# Convert to NumPy array
features = np.array(features)

In [None]:
# Squeeze the array to remove the first dimension
features_squeezed = features.squeeze(axis=1)  # This will change the shape to (5000, 4, 4, 512)

# Now, flatten the feature vectors
features_flattened = features_squeezed.reshape(len(features_squeezed), -1)  # This will reshape to (5000, 8192)

# Check the shape of the flattened features
print(features_flattened.shape)  # Should output (5000, 8192)

In [None]:
from pymilvus import connections, CollectionSchema, FieldSchema, DataType, Collection

# Connect to Milvus
connections.connect("default", host='127.0.0.1', port='19530')

feature_dim = 8192 

# Define schema
fields = [
    FieldSchema(name='image_id', dtype=DataType.INT64, is_primary=True),
    FieldSchema(name='feature_vector', dtype=DataType.FLOAT_VECTOR, dim=feature_dim),  # Specify the dimension of your feature vector
]

schema = CollectionSchema(fields, description="Celeb Look-Alike Model")

# Create collection
collection = Collection(name='celebrity_look_alike', schema=schema)

# Insert features (each feature vector is already a 1D array with size feature_dim)
# data = [
#     [i for i in range(len(features_flattened))],  # Assuming you want to use indices as IDs
#     [feature_vector for feature_vector in features]  # List of individual feature vectors
# ]

# # Ensure the feature vectors have the correct dimensions before inserting
# collection.insert(data)

# Insert features into Milvus
# data = [
#     [i for i in range(len(features_flattened))],  # Assuming you want to use indices as IDs
#     features_flattened.tolist()  # Convert NumPy array to list for insertion
# ]

# collection.insert(data)
BATCH_SIZE = 500  # Set a smaller batch size

for start in range(0, len(features_flattened), BATCH_SIZE):
    end = min(start + BATCH_SIZE, len(features_flattened))
    batch_data = [
        [i for i in range(start, end)],  # Use a range for IDs
        features_flattened[start:end].tolist()  # Select the appropriate slice
    ]
    collection.insert(batch_data)




In [None]:
from pymilvus import connections, Collection

# Connect to Milvus
connections.connect("default", host='127.0.0.1', port='19530')

# Step 2: Access the collection
collection_name = "celebrity_look_alike"
collection = Collection(collection_name)

# Step 3: Get collection info
collection_info = collection.num_entities  # Get number of entities in the collection
print(f"Collection name: {collection.name}")
print(f"Number of entities: {collection_info}")


In [None]:
# Load the collection
collection.load()

# Check if the collection is loaded
if collection.is_loaded:
    print("Collection is loaded. Proceeding with search...")

    search_vector = features_flattened[0].tolist()  # Example feature vector for search
    search_params = {
        "metric_type": "L2",
        "params": {"nprobe": 10}
    }

    # Perform the search
    results = collection.search(data=[search_vector], anns_field='feature_vector', param=search_params, limit=10)

    for result in results:
        print("Search results:", result)
else:
    print("Failed to load the collection.")
