In [1]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import numpy as np
import pandas as pd
import cv2
import os
import h5py
import pickle

In [2]:
train_labels = os.listdir("jpg_category/")
train_labels

['Bluebell',
 'Buttercup',
 'ColtsFoot',
 'Cowslip',
 'Crocus',
 'Daisy',
 'Dandelion',
 'Dffodil',
 'Fritillary',
 'Iris',
 'LilyValley',
 'Pansy',
 'Snowdrop',
 'Sunflower',
 'Tigerlily',
 'Tulip',
 'Windflower']

In [3]:
image_features = []
image_labels = []
for label in train_labels:
    path = os.path.join("jpg_category",label)
    current_label = label
    files = os.listdir(path)
    for x in files:
        file = os.path.join(path,x)
        if not os.path.exists(file):
            print(f"تصویر یافت نشد: {file}")
            continue
        image = cv2.imread(file)
        if image is None:
            print(f"خطا در بارگذاری تصویر: {file}")
            continue
        image = cv2.resize(image,(500,500))
        hist = cv2.calcHist([image],[0,1,2],None,[8,8,8],[0,256,0,256,0,256])
        cv2.normalize(hist,hist)
        hist_flat = hist.flatten()
        image_gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
        hue_image = cv2.HuMoments(cv2.moments(image_gray)).flatten()
        all_features = np.hstack([hist_flat, hue_image])
        image_features.append(all_features)
        image_labels.append(label)

In [4]:
np.shape(image_features)

(1360, 519)

In [5]:
np.shape(image_labels)

(1360,)

In [6]:
label_encod = LabelEncoder()
image_label_encod = label_encod.fit_transform(image_labels)
image_label_encod

array([ 0,  0,  0, ..., 16, 16, 16], dtype=int64)

In [7]:
mms = MinMaxScaler()
image_features_mms = mms.fit_transform(image_features)
image_features_mms

array([[1.97624573e-02, 5.78024168e-04, 0.00000000e+00, ...,
        3.86393234e-18, 5.20710681e-02, 1.98687454e-17],
       [1.74216708e-04, 0.00000000e+00, 0.00000000e+00, ...,
        3.86393622e-18, 5.20754831e-02, 1.98687346e-17],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        3.86393611e-18, 5.20756714e-02, 1.98687525e-17],
       ...,
       [8.78464639e-02, 4.14349259e-03, 0.00000000e+00, ...,
        3.86466489e-18, 5.24989973e-02, 1.98683835e-17],
       [4.34821308e-02, 1.58172996e-02, 1.84748745e-04, ...,
        3.86392312e-18, 5.21474623e-02, 1.98687803e-17],
       [2.38287393e-01, 7.91747487e-02, 7.36974103e-04, ...,
        3.86392990e-18, 5.20687142e-02, 1.98687461e-17]])

In [9]:
if not os.path.exists("data/labels.h5"):
    h5_label = h5py.File("data/labels.h5",'w')
    h5_label.create_dataset("target",data=np.array(image_label_encod))

In [11]:
if not os.path.exists("data/features.h5"):
    h5_feature = h5py.File("data/features.h5",'w')
    h5_feature.create_dataset("features",data=np.array(image_features_mms))

In [13]:
h5_label.close()
h5_feature.close()

In [15]:
if not os.path.exists("data/le.h5"):
    with open("data/le.h5","wb") as f:
        pickle.dump(label_encod,f)

In [17]:
if not os.path.exists("data/mms.h5"):
    with open("data/mms.h5","wb") as f:
        pickle.dump(mms,f)