In [None]:
import numpy as np
import os
from skimage import io, img_as_ubyte
from skimage.transform import resize
import h5py
from skimage.filters import roberts, sobel
from skimage.color import rgb2lab, rgb2gray
import cv2
from skimage.filters.rank import entropy
from skimage.morphology import disk
from PIL import Image
from extraction_backup import extract_features
import h5py

In [None]:

base_path = "PlantVillage"
tomato_diseases = [
    "Tomato_Early_blight", "Tomato_healthy",
    "Tomato_Leaf_Mold"
]

feats = []
names = []

for disease in tomato_diseases:
    path = os.path.join(base_path, disease)
    for im in os.listdir(path):
        img_path = os.path.join(path, im)
        print(f"Extracting features from image - {img_path}")
        img = io.imread(img_path)
        
        # temp provision to handle 4-channel images
        if img.shape[-1] == 4:
            img = img[:, :, :3]
        
        X = extract_features(img)
        feats.append(X)
        names.append(img_path)

feats = np.array(feats)
names = np.array(names, dtype='S') 

# Save features to hdf5 file incrementally
feature_file = "CustomFeatures.h5"
print("Saving features to H5 file")

with h5py.File(feature_file, 'a') as h5f:
    if 'dataset_1' in h5f:
        existing_feats = h5f['dataset_1'][:]
        existing_names = h5f['dataset_2'][:]
        all_feats = np.concatenate((existing_feats, feats), axis=0)
        all_names = np.concatenate((existing_names, names), axis=0)
        del h5f['dataset_1']
        del h5f['dataset_2']
        h5f.create_dataset('dataset_1', data=all_feats)
        h5f.create_dataset('dataset_2', data=all_names)
    else:
        h5f.create_dataset('dataset_1', data=feats)
        h5f.create_dataset('dataset_2', data=names)

print("Features saved successfully!")

In [None]:
file_size = os.path.getsize('CustomFeatures.h5')
print(file_size)