In [None]:
import cv2
import numpy as np
import pandas as pd
import os
import glob
from skimage.measure import label, regionprops, regionprops_table
from skimage.color import label2rgb
from skimage.feature import graycomatrix, graycoprops

In [None]:

# Read folder
folder_path = r'D:\ICIIT2024\Dataset Rice seed\Huongthom\Huongthom\Huong_thom-1'

# Sort
image_names = sorted(os.listdir(folder_path))

# Check images
if not image_names:
    print("No images found in the directory.")
else:
    if len(image_names) > 2:
        print(f"The names of the first three images in the directory are: {image_names[0]}, {image_names[1]}, {image_names[2]}")
    else:
        print("Not enough images to display three names.")

    features = []

    # Processing
    for image_name in image_names:
        image_path = os.path.join(folder_path, image_name)
        img = cv2.imread(image_path, cv2.IMREAD_COLOR)

        if img is None:
            print(f"Failed to read image: {image_path}")
            continue

        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, img_bin = cv2.threshold(img_gray, 128, 255, cv2.THRESH_BINARY)
        label_img = label(img_bin)
        props = regionprops(label_img)

        if props:
            largest_prop = max(props, key=lambda x: x.area)
            features.append([
                image_name,
                largest_prop.area,
                largest_prop.bbox[3] - largest_prop.bbox[1],  # length
                largest_prop.bbox[2] - largest_prop.bbox[0],  # width
                (largest_prop.bbox[3] - largest_prop.bbox[1]) / (largest_prop.bbox[2] - largest_prop.bbox[0]) if (largest_prop.bbox[2] - largest_prop.bbox[0]) != 0 else 0,
                largest_prop.major_axis_length,
                largest_prop.minor_axis_length,
                largest_prop.convex_area,
                cv2.arcLength(np.array(largest_prop.coords), closed=True),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 0]),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 1]),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 2]),
                np.sqrt(np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 0])),
                np.sqrt(np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 1])),
                np.sqrt(np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 2])),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]]),
                np.std(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]]),
                np.sum((img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]] - np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]])) ** 2),
                np.sum((img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]] - np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]])) ** 3),
                'positive',
            ])
        else:
            # Append a row of NaNs or zeros if no regions found
            features.append([image_name] +[np.nan] * 18 + ['positive'])

    # add and save
    df = pd.DataFrame(features, columns=['Name', 'area', 'length', 'width', 'length_width_ratio', 'major_axis_length',
                                         'minor_axis_length', 'convex_area', 'perimeter', 'r_mean', 'g_mean', 'b_mean',
                                         'rs', 'gs', 'bs', 'mean', 'std_dev', 'uniformity', 'third_moment', 'class'])
    df.to_csv('positive.csv', index=False)


In [None]:

# Read folder
folder_path = r'D:\ICIIT2024\Dataset Rice seed\Huongthom\Huongthom\Huong_thom-1'

# Sort
image_names = sorted(os.listdir(folder_path))

# Check images
if not image_names:
    print("No images found in the directory.")
else:
    if len(image_names) > 2:
        print(f"The names of the first three images in the directory are: {image_names[0]}, {image_names[1]}, {image_names[2]}")
    else:
        print("Not enough images to display three names.")

    features = []

    # Processing
    for image_name in image_names:
        image_path = os.path.join(folder_path, image_name)
        img = cv2.imread(image_path, cv2.IMREAD_COLOR)

        if img is None:
            print(f"Failed to read image: {image_path}")
            continue

        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, img_bin = cv2.threshold(img_gray, 128, 255, cv2.THRESH_BINARY)
        label_img = label(img_bin)
        props = regionprops(label_img)

        if props:
            largest_prop = max(props, key=lambda x: x.area)
            features.append([
                image_name,
                largest_prop.area,
                largest_prop.bbox[3] - largest_prop.bbox[1],  # length
                largest_prop.bbox[2] - largest_prop.bbox[0],  # width
                (largest_prop.bbox[3] - largest_prop.bbox[1]) / (largest_prop.bbox[2] - largest_prop.bbox[0]) if (largest_prop.bbox[2] - largest_prop.bbox[0]) != 0 else 0,
                largest_prop.major_axis_length,
                largest_prop.minor_axis_length,
                largest_prop.convex_area,
                cv2.arcLength(np.array(largest_prop.coords), closed=True),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 0]),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 1]),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 2]),
                np.sqrt(np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 0])),
                np.sqrt(np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 1])),
                np.sqrt(np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 2])),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]]),
                np.std(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]]),
                np.sum((img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]] - np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]])) ** 2),
                np.sum((img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]] - np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]])) ** 3),
                'negative',
            ])
        else:
            # Append a row of NaNs or zeros if no regions found
            features.append([image_name] +[np.nan] * 18 + ['negative'])

    # add and save
    df = pd.DataFrame(features, columns=['Name', 'area', 'length', 'width', 'length_width_ratio', 'major_axis_length',
                                         'minor_axis_length', 'convex_area', 'perimeter', 'r_mean', 'g_mean', 'b_mean',
                                         'rs', 'gs', 'bs', 'mean', 'std_dev', 'uniformity', 'third_moment', 'class'])
    df.to_csv('negative.csv', index=False)


In [None]:
df1 = pd.read_csv('positive.csv')
df2 = pd.read_csv('negative.csv')

result = pd.concat([df1, df2], ignore_index=True)

result.to_csv('.csv', index=False)

In [None]:
df3 = pd.read_csv('.csv')
df3.drop_duplicates()
df3.shape