In [None]:
import numpy as np
import os
from multiprocessing import Pool, cpu_count
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import Model
import tensorflow as tf

def extract_features(image_path):
    img_array = np.load(image_path)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    features = model.predict(img_array)
    return image_path, features

def process_batch(batch):
    results = {}
    for image_path in batch:
        image_path, features = extract_features(image_path)
        results[image_path] = features
    return results

def chunkify(lst, n):
    return [lst[i::n] for i in range(n)]

def main():
    input_dir = '/Users/hardikchhipa/Desktop/Data manupulations projects/heart_Echo/Dataset of ECHO/HMC-QU/per-processed_imgs_A4C'
    global model
    model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    model = Model(inputs=model.input, outputs=model.output)
    
    # Get all .npy file paths
    file_paths = []
    for root, _, files in os.walk(input_dir):
        for file in files:
            if file.endswith('.npy'):
                file_paths.append(os.path.join(root, file))
    
    # Divide the file paths into chunks for each CPU core
    num_cores = cpu_count()
    file_chunks = chunkify(file_paths, num_cores)
    
    # Use multiprocessing to process batches in parallel
    with Pool(num_cores) as pool:
        results = pool.map(process_batch, file_chunks)
    
    # Combine results from all processes
    features_dict = {}
    for result in results:
        features_dict.update(result)
    
    # Check if features_dict is populated
    if not features_dict:
        print("No features extracted. Check the preprocessing and feature extraction steps.")
    else:
        print(f"Extracted features for {len(features_dict)} images.")

if __name__ == "__main__":
    main()


## With TQDM

In [None]:
import numpy as np
import os
from multiprocessing import Pool, cpu_count
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import Model
import tensorflow as tf
from tqdm import tqdm

def extract_features(image_path):
    global model
    img_array = np.load(image_path)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    features = model.predict(img_array)
    return image_path, features

def process_batch(batch):
    results = {}
    for image_path in batch:
        image_path, features = extract_features(image_path)
        results[image_path] = features
    return results

def chunkify(lst, n):
    return [lst[i::n] for i in range(n)]

def main():
    input_dir = '/Users/hardikchhipa/Desktop/Data manupulations projects/heart_Echo/Dataset of ECHO/HMC-QU/per-processed_imgs_A4C'
    global model
    model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    model = Model(inputs=model.input, outputs=model.output)
    
    # Get all .npy file paths
    file_paths = []
    for root, _, files in os.walk(input_dir):
        for file in files:
            if file.endswith('.npy'):
                file_paths.append(os.path.join(root, file))
    
    # Divide the file paths into chunks for each CPU core
    num_cores = cpu_count()
    file_chunks = chunkify(file_paths, num_cores)
    
    # Use multiprocessing to process batches in parallel
    with Pool(num_cores) as pool:
        with tqdm(total=len(file_paths)) as pbar:
            for _ in pool.imap_unordered(process_batch, file_chunks):
                pbar.update(len(file_chunks[0]))
    
    # Combine results from all processes
    features_dict = {}
    for result in results:
        features_dict.update(result)
    
    # Check if features_dict is populated
    if not features_dict:
        print("No features extracted. Check the preprocessing and feature extraction steps.")
    else:
        print(f"Extracted features for {len(features_dict)} images.")

if __name__ == "__main__":
    main()
