**Creating Dataset from Images**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
cd /content/drive/MyDrive/Inter-IIT

/content/drive/MyDrive/Inter-IIT


In [3]:
import os
import cv2
import numpy as np
import pandas as pd

In [4]:
!pip install mahotas



In [5]:
pip install numpy --upgrade



In [6]:
import mahotas as mt
from matplotlib import pyplot as plt
%matplotlib inline

In [7]:
path = r'/content/drive/MyDrive/Inter-IIT/Leaves/'
img_folder = os.listdir(path)

In [8]:
points_to_diff_category = [1001,1059,1060,1122,1552,1616,1123,1194,1195,1267,1268,1323,1324,1385,1386,1437,1497,1551,1438,1496,2001,2050,2051,2113,2114,2165,2166,2230,2231,2290,2291,2346,2347,2423,2424,2485,2486,2546,2547,2612,2616,2675,3001,3055,3056,3110,3111,3175,3176,3229,3230,3281,3282,3334,3335,3389,3390,3446,3447,3510,3511,3563,3566,3621]

In [10]:
def form_dataset():
    attributes = ['target','area','perimeter','length','width','aspect_ratio','rectangularity','circularity','red_mean','green_mean','blue_mean','stddev_red','stddev_green','stddev_blue','contrast','correlation','inverse_difference_moments','entropy']
    df = pd.DataFrame([], columns=attributes)
    for file in img_folder:
        imgpath = path + file
        main_img = cv2.imread(imgpath)
        img = cv2.cvtColor(main_img, cv2.COLOR_BGR2RGB)
        gray = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
        blur = cv2.GaussianBlur(gray, (25,25),0)
        ret_otsu,im_bw_otsu = cv2.threshold(blur,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
        kernel = np.ones((50,50),np.uint8)
        closing = cv2.morphologyEx(im_bw_otsu, cv2.MORPH_CLOSE, kernel)
        red_channel = img[:,:,0]
        green_channel = img[:,:,1]
        blue_channel = img[:,:,2]
        blue_channel[blue_channel == 255] = 0
        green_channel[green_channel == 255] = 0
        red_channel[red_channel == 255] = 0
        red_mean = np.mean(red_channel)
        green_mean = np.mean(green_channel)
        blue_mean = np.mean(blue_channel)
        red_std = np.std(red_channel)
        green_std = np.std(green_channel)
        blue_std = np.std(blue_channel)
        contours, _ = cv2.findContours(closing,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
        cnt = contours[0]
        Mom = cv2.moments(cnt)
        area = cv2.contourArea(cnt)
        perimeter = cv2.arcLength(cnt,True)
        x,y,w,h = cv2.boundingRect(cnt)
        aspect_ratio = float(w)/h
        rectangularity = w*h/area
        circularity = ((perimeter)**2)/area
        textures = mt.features.haralick(gray)
        ht_mean = textures.mean(axis=0)
        contrast = ht_mean[1]
        correlation = ht_mean[2]
        inverse_diff_moments = ht_mean[4]
        entropy = ht_mean[8]
        label=''
        label_num = int(file.split(".")[0])
        fl = 0
        i = 0 
        for i in range(0,len(points_to_diff_category),2):
          if((label_num >= points_to_diff_category[i]) and (label_num <= points_to_diff_category[i+1])):
            fl = 1
            break
        if(fl==1):
          label = int((i/2))
        v = [label,area,perimeter,w,h,aspect_ratio,rectangularity,circularity,red_mean,green_mean,blue_mean,red_std,green_std,blue_std,contrast,correlation,inverse_diff_moments,entropy]
        temp = pd.DataFrame([v],columns=attributes)
        df = df.append(temp)
    return df

In [11]:
data = form_dataset()

In [12]:
data.shape

(1854, 18)

In [13]:
data.to_csv("Leaves_Data.csv")