# Preprocess and Feature Extraction - Flavia dataset

Extracted features are saved in file named "Flavia_features.csv"

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
!pip install mahotas


Collecting mahotas
[?25l  Downloading https://files.pythonhosted.org/packages/19/ad/553b246b0a35dccc3ed58dc8889a67124bf5ab858e9c6b7255d56086e70c/mahotas-1.4.11-cp37-cp37m-manylinux2010_x86_64.whl (5.7MB)
[K     |████████████████████████████████| 5.7MB 15.9MB/s 
Installing collected packages: mahotas
Successfully installed mahotas-1.4.11


In [None]:
import cv2
import os
import numpy as np
import pandas as pd
import mahotas as mt
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
ds_path = "/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/"
img_files = os.listdir(ds_path)

In [None]:
def create_dataset():
    names = ['area','perimeter','physiological_length','physiological_width','aspect_ratio','rectangularity','circularity', \
             'mean_r','mean_g','mean_b','stddev_r','stddev_g','stddev_b', \
             'contrast','correlation','inverse_difference_moments','entropy'
            ]
    df = pd.DataFrame([], columns=names)
    for file in img_files:
        imgpath = ds_path + file
        print(imgpath)
        main_img = cv2.imread(imgpath)
        
        #Preprocessing
        img = cv2.cvtColor(main_img, cv2.COLOR_BGR2RGB)
        gs = cv2.cvtColor(np.array(img),cv2.COLOR_RGB2GRAY)
        blur = cv2.GaussianBlur(gs, (25,25),0)
        ret_otsu,im_bw_otsu = cv2.threshold(blur,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
        kernel = np.ones((50,50),np.uint8)
        closing = cv2.morphologyEx(im_bw_otsu, cv2.MORPH_CLOSE, kernel)
        
        #Shape features
        contours, image = cv2.findContours(closing,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
        cnt = contours[0]
        M = cv2.moments(cnt)
        area = cv2.contourArea(cnt)
        perimeter = cv2.arcLength(cnt,True)
        x,y,w,h = cv2.boundingRect(cnt)
        aspect_ratio = float(w)/h
        rectangularity = w*h/area
        circularity = ((perimeter)**2)/area
        
        #Color features
        red_channel = img[:,:,0]
        green_channel = img[:,:,1]
        blue_channel = img[:,:,2]
        blue_channel[blue_channel == 255] = 0
        green_channel[green_channel == 255] = 0
        red_channel[red_channel == 255] = 0
        
        red_mean = np.mean(red_channel)
        green_mean = np.mean(green_channel)
        blue_mean = np.mean(blue_channel)
        
        red_std = np.std(red_channel)
        green_std = np.std(green_channel)
        blue_std = np.std(blue_channel)
        
        #Texture features
        textures = mt.features.haralick(gs)
        ht_mean = textures.mean(axis=0)
        contrast = ht_mean[1]
        correlation = ht_mean[2]
        inverse_diff_moments = ht_mean[4]
        entropy = ht_mean[8]
        
        vector = [area,perimeter,w,h,aspect_ratio,rectangularity,circularity,\
                  red_mean,green_mean,blue_mean,red_std,green_std,blue_std,\
                  contrast,correlation,inverse_diff_moments,entropy
                 ]
        
        df_temp = pd.DataFrame([vector],columns=names)
        df = df.append(df_temp)
        print(file)
    return df

In [None]:
dataset = create_dataset()

/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2474.jpg
2474.jpg
/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2406.jpg
2406.jpg
/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2227.jpg
2227.jpg
/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2429.jpg
2429.jpg
/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2473.jpg
2473.jpg
/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2284.jpg
2284.jpg
/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2321.jpg
2321.jpg
/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2346.jpg
2346.jpg
/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2452.jpg
2452.jpg
/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2329.jpg
2329.jpg
/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2462.jpg
2462.jpg
/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2285.jpg
2285.jpg
/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2327.jpg
2327.jpg
/content/drive/My Drive/Leaf_Classifier/Flavia_leaves/2402.jpg
2

In [None]:
dataset.shape

(1907, 17)

In [None]:
type(dataset)

pandas.core.frame.DataFrame

In [None]:
dataset.to_csv("Flavia_features.csv")

In [None]:
dataset = pd.read_csv("Flavia_features.csv")
dataset.head(5)

Unnamed: 0.1,Unnamed: 0,area,perimeter,physiological_length,physiological_width,aspect_ratio,rectangularity,circularity,mean_r,mean_g,mean_b,stddev_r,stddev_g,stddev_b,contrast,correlation,inverse_difference_moments,entropy
0,0,799164.5,3904.807676,1422,994,1.430584,1.768682,19.07933,38.733337,66.020932,16.765756,47.887843,77.688063,26.379715,11.212935,0.99874,0.69619,5.292527
1,0,48135.5,3524.454414,1378,919,1.499456,26.308691,258.058583,4.268456,5.384296,3.652107,25.85315,30.524472,24.021988,5.970259,0.993907,0.969667,0.663949
2,0,760065.5,3875.215624,1294,985,1.313706,1.676948,19.757897,171.516174,222.333394,150.031741,97.469089,37.794385,120.087013,36.289554,0.995576,0.647226,6.082266
3,0,110.5,49.698484,16,17,0.941176,2.461538,22.352392,39.01116,92.697098,7.116856,53.165925,105.731311,24.191019,19.532802,0.996752,0.633766,6.214738
4,0,847089.5,4046.651263,1473,957,1.539185,1.664123,19.331353,41.741931,70.821872,23.112604,48.610715,78.947987,30.832099,11.979339,0.998607,0.675842,5.472656
