In [1]:
from skimage.io import imread
from skimage.feature import greycomatrix, greycoprops
from sklearn import preprocessing
import numpy as np
import pandas as pd
import cv2 as cv
import os
import warnings
warnings.filterwarnings('ignore')

In [2]:
input_data_dir = '4.input'

In [3]:
no_samples = 0
labels = os.listdir(input_data_dir)
for label in labels:
    no_samples += len(os.listdir(os.path.join(input_data_dir,label)))
print('Number of samples : ',no_samples)

Number of samples :  220


In [4]:
no_cols = 11
cols =np.asarray(['Contrast','Energy','Homogeneity','Correlation','Dissimilarity','ASM','Area','Perimeter','Epsilon','IsConvex','Class'])

In [5]:
labeling_class = {
  "benign": 0,
  "malignant": 1,
  "no_tumor": 2
}

In [6]:
def extract_feature():
    features = np.ndarray((no_samples, no_cols))
    
    labels = os.listdir(input_data_dir)
    print(labels)
    i = 0
    for label in labels:
        image_names = os.listdir(os.path.join(input_data_dir, label))
        total = len(image_names)
        print(total)
        for image_name in image_names:
            img = imread(os.path.join(input_data_dir, label, image_name))
            img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
            
            ret,thresh = cv.threshold(img,127,255,0)
            im2,contours,hierarchy = cv.findContours(thresh, 1, 2)
            cnt = contours[0]

            area = cv.contourArea(cnt)
            perimeter = cv.arcLength(cnt,True)
            epsilon = 0.1*cv.arcLength(cnt,True)
            k = cv.isContourConvex(cnt)
            
            S = preprocessing.MinMaxScaler((0,11)).fit_transform(img).astype(int)

            g = greycomatrix(S, distances=[1], angles=[0], levels=256, symmetric=False, normed=False)

            contrast = greycoprops(g, 'contrast')
            energy = greycoprops(g,'energy')
            homogeneity = greycoprops(g, 'homogeneity')
            correlation = greycoprops(g, 'correlation')
            dissimilarity = greycoprops(g, 'dissimilarity')
            ASM = greycoprops(g, 'ASM')
            
            f_arr =  np.asarray([contrast[0][0],energy[0][0],homogeneity[0][0],correlation[0][0],dissimilarity[0][0],ASM[0][0],area,perimeter,epsilon,k, labeling_class[label]], dtype='object')
            features[i] = f_arr
            i += 1
    df = pd.DataFrame(features, columns=cols)
    df['Class'] = np.int64(df['Class'])
    df.to_csv("data.csv",index= False)

In [7]:
extract_feature()

['benign', 'malignant', 'no_tumor']
16
138
66


In [8]:
data_as_frame = pd.read_csv('data.csv')

In [9]:
data_as_frame.head()

Unnamed: 0,Contrast,Energy,Homogeneity,Correlation,Dissimilarity,ASM,Area,Perimeter,Epsilon,IsConvex,Class
0,0.458577,0.964199,0.984987,0.911907,0.070014,0.92968,2.0,5.656854,0.565685,1.0,0
1,0.331137,0.983411,0.991778,0.841892,0.045642,0.967097,0.0,0.0,0.0,0.0,0
2,0.397629,0.963333,0.985258,0.922795,0.064784,0.92801,2.0,5.656854,0.565685,1.0,0
3,0.247455,0.985431,0.992373,0.850843,0.037936,0.971074,0.0,0.0,0.0,0.0,0
4,0.302476,0.98199,0.991358,0.875217,0.045188,0.964305,0.0,2.828427,0.282843,0.0,0


All features extracted and saved into a dataframe.