In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import graycomatrix, graycoprops
from skimage.measure import shannon_entropy
from matplotlib import pyplot as plt

In [None]:
ds_path = r"F:\Jup\Celebal Assignments\Untitled Folder\plant-pathology-2020-fgvc7\images"
img_files=os.listdir(ds_path)

In [None]:
def create_dataset(img_files, ds_path):
    names = ['mean_r', 'mean_g', 'mean_b', 'stddev_r', 'stddev_g', 'stddev_b', 
             'contrast', 'correlation', 'inverse_difference_moments', 'entropy']
    df = pd.DataFrame([], columns=names)
    
    for file in img_files:
        imgpath = os.path.join(ds_path, file)
        main_img = cv2.imread(imgpath)
        
        # Resize image
        resized_image = cv2.resize(main_img, (7000, 4000))
        
        # Convert to grayscale
        gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
        
        # Apply Gaussian blur
        blurred = cv2.GaussianBlur(gray, (55, 55), 0)
        
        # Apply Otsu's thresholding
        ret_otsu, im_bw_otsu = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        
        # Morphological closing
        kernel = np.ones((50, 50), np.uint8)
        closing = cv2.morphologyEx(im_bw_otsu, cv2.MORPH_CLOSE, kernel)
        
        # Find contours
        contours, hierarchy = cv2.findContours(closing, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        
        # Visualize intermediate steps
        plt.figure(figsize=(15, 10))

        plt.subplot(1, 4, 1)
        plt.title('Grayscale')
        plt.imshow(gray, cmap='gray')

        plt.subplot(1, 4, 2)
        plt.title('Blurred')
        plt.imshow(blurred, cmap='gray')

        plt.subplot(1, 4, 3)
        plt.title('Otsu Thresholding')
        plt.imshow(im_bw_otsu, cmap='gray')

        plt.subplot(1, 4, 4)
        plt.title('Morphological Closing')
        plt.imshow(closing, cmap='gray')

        plt.show()

        # Find the correct leaf contour
        def find_contour(cnts):
            contains = []
            y_ri, x_ri, _ = resized_image.shape
            for cc in cnts:
                yn = cv2.pointPolygonTest(cc, (x_ri // 2, y_ri // 2), False)
                contains.append(yn)

            val = [contains.index(temp) for temp in contains if temp > 0]
            return val[0] if val else None

        index = find_contour(contours)
        if index is not None:
            cnt = contours[index]
            
            # Create mask and apply it
            black_img = np.empty([4000, 7000, 3], dtype=np.uint8)
            black_img.fill(0)
            mask = cv2.drawContours(black_img, [cnt], 0, (255, 255, 255), -1)
            masked_img = cv2.bitwise_and(resized_image, mask)
            
            # Replace black pixels with white
            final_img = np.copy(masked_img)
            black_pixels = np.all(final_img == [0, 0, 0], axis=-1)
            final_img[black_pixels] = [255, 255, 255]
            
            # Convert image to RGB for color feature extraction
            img = cv2.cvtColor(final_img, cv2.COLOR_BGR2RGB)
            
            # Extract color features
            red_channel = img[:,:,0]
            green_channel = img[:,:,1]
            blue_channel = img[:,:,2]
            
            red_mean = np.mean(red_channel)
            green_mean = np.mean(green_channel)
            blue_mean = np.mean(blue_channel)
            
            red_std = np.std(red_channel)
            green_std = np.std(green_channel)
            blue_std = np.std(blue_channel)
            
            # Extract texture features using scikit-image
            glcm = graycomatrix(gray, [1], [0], 256, symmetric=True, normed=True)
            contrast = graycoprops(glcm, 'contrast')[0, 0]
            correlation = graycoprops(glcm, 'correlation')[0, 0]
            inverse_diff_moments = graycoprops(glcm, 'ASM')[0, 0]
            entropy = shannon_entropy(gray)
            
            vector = [red_mean, green_mean, blue_mean, red_std, green_std, blue_std,
                      contrast, correlation, inverse_diff_moments, entropy]
            
            df_temp = pd.DataFrame([vector], columns=names)
            df = pd.concat([df, df_temp], ignore_index=True)
            print(f"Processed file: {file}")
        else:
            print(f"No valid contour found for file: {file}")
    
    return df


In [None]:
dataset.to_csv("plant_village.csv")