### libraries

In [228]:
#thanks to https://github.com/ebenolson/tessa/blob/master/sfta.py

from cmath import e
from time import time
import cv2
import numpy as np
# from skimage.feature import graycomatrix, graycoprops
from skimage.measure import shannon_entropy as Entropy
from sklearn import svm
from sklearn.svm import SVC
import os
import utils
import pandas as pd
import numpy as np

### Extraction of SFTA

In [229]:
def mrange(start, step, end):
    """mimic behavior of MATLAB's range function
    """
    def gen(start, step, end):
        n = start
        while n <= end:
            yield n
            n = n+step
    return list(gen(start, step, end))


def findBorders(Im):
    I = np.pad(Im, [[1, 1], [1, 1]], 'constant', constant_values=1).astype('uint8')

    I2 = I[2:, 1:-1]+I[0:-2, 1:-1]+I[1:-1:, 2:]+I[1:-1:, 0:-2] + \
        I[2:, 2:]+I[2:, 0:-2]+I[0:-2, 2:]+I[0:-2, 0:-2]
    return Im * (I2 < 8)


def otsu(counts):
    p = counts*1.0/np.sum(counts)
    omega = np.cumsum(p)
    mu = np.cumsum(p*range(1, len(p)+1))
    mu_t = mu[-1]
    with np.errstate(divide='ignore', invalid='ignore'):
        sigma_b_squared = (mu_t * omega - mu)**2 / (omega * (1-omega))
    maxval = np.max(np.nan_to_num(sigma_b_squared))
    if np.isnan(sigma_b_squared).all():
        pos = 0
    else:
        pos = np.mean((sigma_b_squared == maxval).nonzero())+1
    return pos


def otsurec(I, ttotal):
    if I == []:
        T = []
    else:
        I = I.astype(np.uint8).flatten()

        num_bins = 256
        counts = np.histogram(I, range(num_bins))[0]

        T = np.zeros((ttotal, 1))

        def otsurec_helper(lowerBin, upperBin, tLower, tUpper):
            if ((tUpper < tLower) or (lowerBin >= upperBin)):
                return
            level = otsu(counts[int(np.ceil(lowerBin))-1:int(np.ceil(upperBin))]) + lowerBin

            insertPos = int(np.ceil((tLower + tUpper) / 2.))
            T[insertPos-1] = level / num_bins
            otsurec_helper(lowerBin, level, tLower, insertPos - 1)
            otsurec_helper(level + 1, upperBin, insertPos + 1, tUpper)

        otsurec_helper(1, num_bins, 1, ttotal)
    return [t[0] for t in T]


def hausDim(I):
    maxDim = np.max(np.shape(I))
    newDimSize = int(2**np.ceil(np.log2(maxDim)))
    rowPad = newDimSize - np.shape(I)[0]
    colPad = newDimSize - np.shape(I)[1]

    I = np.pad(I, ((0, rowPad), (0, colPad)), 'constant')

    boxCounts = np.zeros(int(np.ceil(np.log2(maxDim)))+1)
    resolutions = np.zeros(int(np.ceil(np.log2(maxDim)))+1)

    iSize = np.shape(I)[0]
    boxSize = 1
    idx = 0
    while boxSize <= iSize:
        boxCount = (I > 0).sum()
        idx = idx + 1
        boxCounts[idx-1] = boxCount
        resolutions[idx-1] = 1./boxSize

        boxSize = boxSize*2
        I = I[::2, ::2]+I[1::2, ::2]+I[1::2, 1::2]+I[::2, 1::2]
    D = np.polyfit(np.log(resolutions), np.log(boxCounts), 1)
    return D[0]


# from PIL import Image
# DF=4
def extractSFTA(filename, outputFileName, nt=4):
    I = cv2.imread(filename)
    # I = np.array(Image.fromarray(I).resize((I.shape[0]//DF, I.shape[1]// DF)))

    if len(np.shape(I)) == 3:
        I = np.mean(I, 2)
    # elif len(np.shape(I)) != 2:
    #     raise ImageDimensionError

    I = I.astype(np.uint8)

    T = otsurec(I, nt)
    dSize = len(T)*6
    D = np.zeros(dSize)
    pos = 0
    for t in range(len(T)):
        thresh = T[t]
        Ib = I > (thresh*255)
        Ib = findBorders(Ib)

        vals = I[Ib.nonzero()].astype(np.double)
        D[pos] = hausDim(Ib)
        pos += 1

        D[pos] = np.mean(vals)
        pos += 1

        D[pos] = len(vals)
        pos += 1

    T = T+[1.0, ]
    for t in range(len(T)-1):
        lowerThresh = T[t]
        upperThresh = T[t+1]
        Ib = (I > (lowerThresh*255)) * (I < (upperThresh*255))
        Ib = findBorders(Ib)

        vals = I[Ib.nonzero()].astype(np.double)
        D[pos] = hausDim(Ib)
        pos += 1

        D[pos] = np.mean(vals)
        pos += 1

        D[pos] = len(vals)
        pos += 1

    features = D
    features = features.flatten()
    features = features.reshape(1, -1)

    with open(outputFileName+'.csv', 'a') as csvfile:
        np.savetxt(csvfile, features, fmt='%f', delimiter=',')
        csvfile.close()
    
    # print('extracted sfta: ' , len(D))
    
    return D


# class SegmentationFractalTextureAnalysis(object):
#     """Computes features by applying multiple thresholds and caculating the fractal dimension
#         of the resulting binary images"""
#     def __init__(self, nt):
#         """Args:
#                 nt: the number of thresholds
#         """
#         super(SegmentationFractalTextureAnalysis, self).__init__()
#         self.nt = nt

#     def feature_vector(self, image):
#         """Returns the feature vector of an image
#         """
#         return sfta(image, self.nt)


### initialize output files

In [230]:
def intialize(): 
    
    # check if file exists delete it
    if os.path.isfile('femaleSFTA.csv'):
        os.remove('femaleSFTA.csv')

    if os.path.isfile("maleSFTA.csv"):
        os.remove('maleSFTA.csv')

    if os.path.isfile("testSFTA.csv"):
        os.remove('testSFTA.csv')
    if os.path.isfile("icdarSFTA.csv"):
        os.remove('icdarSFTA.csv')

    with open('femaleSFTA.csv', 'a') as csvfile:
        np.savetxt(csvfile, [], delimiter=',')
        csvfile.close()

    with open('maleSFTA.csv', 'a') as csvfile:
        np.savetxt(csvfile, [], delimiter=',')
        csvfile.close()

    with open('testSFTA.csv', 'a') as csvfile:
        np.savetxt(csvfile, [], delimiter=',')
        csvfile.close()

    with open('icdarSFTA.csv', 'a') as csvfile:
        np.savetxt(csvfile, [], delimiter=',')
        csvfile.close()


### extract features of ICDAR dataset

In [231]:
def extractICDAR():

    X_train=[]
    Y_train=[]

    # read csv file
    df = pd.read_csv('train_answers.csv')
    
    # get the labels
    icdar_classes = df['male'].values

    i = 0
    for filename in os.listdir('images_gender/images/train'):
        try:
            X_train.append(extractSFTA('images_gender/images/train/'+filename, 'icdarSFTA'))
            Y_train.append(icdar_classes[i//2])
            i+=1
            print('icdar X_train: ', (len(X_train),len(X_train[0])))
            print('icdar Y_train: ', (len(Y_train),1))
        except Exception as e:
            print(e)
            continue

    return X_train,Y_train

### extract features from CMP dataset

In [232]:
def extractCMP():

    X_train=[]
    Y_train=[]

    X_test=[]
    Y_test=[]

    for filename in os.listdir("Females/Females"):
        try:
            X_train.append(extractSFTA('Females/Females/'+filename, 'femaleSFTA'))
            Y_train.append(0)
            # print('cmp X_train: ', (len(X_train),len(X_train[0])))
            # print('cmp Y_train: ', (len(Y_train),1))
        except Exception as e:
            print(e)
            continue

    for filename in os.listdir("Males/Males"):
        try:
            X_train.append(extractSFTA('Males/Males/'+filename, 'maleSFTA'))
            Y_train.append(1)
            # print('cmp X_train: ', (len(X_train),len(X_train[0])))
            # print('cmp Y_train: ', (len(Y_train),1))
        except Exception as e:
            print(e)
            continue

    for filename in os.listdir('Unspecified/Unspecified'):
        X_test.append(extractSFTA('Unspecified/Unspecified/'+filename, 'testSFTA'))
        if filename.find('F')!=-1:
            Y_test.append(0)
        else:
            Y_test.append(1)
        # print('cmp X_test: ', (len(X_train),len(X_train[0])))
        # print('cmp Y_test: ', (len(Y_train),1))

    return X_train,Y_train,X_test,Y_test



### train models and predict

In [233]:
def MLmodels(X_train,Y_train,X_test,Y_test):

    # train the classifier and predict the test data
    print("Training the classifier...")
    start_time = time()

    clf = SVC(C=50000.0, class_weight='balanced', gamma=0.0001, kernel='rbf')
    # print(train_classes.shape)
    # svmTuner(features, train_classes)
    clf.fit(X_train, Y_train)

    end_time = time()
    print("Time taken to train the classifier: ",end_time-start_time, " seconds")


    # predict the test data
    print("Predicting the test data...")
    start_time = time()

    predicted_classes = clf.predict(X_test)
    
    end_time = time()
    print(predicted_classes,Y_test)
    print("Time taken to predict the test data: ",end_time-start_time, " seconds")

    # calculate the accuracy
    print("Calculating the accuracy...")
    print("Accuracy: ", utils.get_accuracy(Y_test, predicted_classes))


### main function

### ICDAR part

In [234]:
if __name__ == '__main__':
   
    print("Extracting SFTA features...")

    intialize()

    X_train=[]
    Y_train=[]

    X_test=[]
    Y_test=[]

    # time to extract features
    start_time = time()

    X1_train,Y1_train=[],[]#extractICDAR()
    


Extracting SFTA features...


### CMP part

In [235]:
    X2_train,Y2_train,X2_test,Y2_test=extractCMP()
    
    X_train=X1_train+X2_train
    Y_train=Y1_train+Y2_train

    X_test=X2_test
    Y_test=Y2_test

    end_time = time()
    print("Time taken to extract features: ", end_time-start_time, " seconds")    

  if I == []:


Time taken to extract features:  720.0524525642395  seconds


### Models part

In [236]:
    MLmodels(X_train,Y_train,X_test,Y_test)
    # print(X_train)
    # print(Y_train)
    # print(X_test)
    # print(Y_test)

Training the classifier...
Time taken to train the classifier:  0.012982845306396484  seconds
Predicting the test data...
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1] [0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1]
Time taken to predict the test data:  0.002969980239868164  seconds
Calculating the accuracy...
Accuracy:  0.54


### testing area

In [237]:
x=[1,3]
y=[[4,5],[6,7]]
# y.append(x)
print(x+y)
print(y+x)

# x=np.empty((0,2))
# print(x.shape)
# y=np.array([
#     [1,2],
#     [3,4]
# ])
# z=np.array([
#     [11,11],
#     [31,41]
# ])

# print(x,y)

# c=np.append(x,y,axis=0)
# print(c)

# c=np.append(c,z,axis=0)
# print(c.shape)



[1, 3, [4, 5], [6, 7]]
[[4, 5], [6, 7], 1, 3]
