In [39]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
from skimage import feature, exposure
from matplotlib.pyplot import bar
from skimage.transform import resize
import sklearn
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from PIL import Image, ImageEnhance
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.calibration import CalibratedClassifierCV
from sklearn.neural_network import MLPClassifier
import os

In [2]:
N_RHO_BINS = 7
N_ANGLE_BINS = 12
N_BINS = N_RHO_BINS * N_ANGLE_BINS
BIN_SIZE = 360 // N_ANGLE_BINS
R_INNER = 5.0
R_OUTER = 35.0
K_S = np.arange(3, 8)

In [13]:
def preprocess_image(img_file,show_images=True,sharpness_factor = 10, bordersize = 3,):
    im = Image.open(img_file)

    enhancer = ImageEnhance.Sharpness(im)
    im_s_1 = enhancer.enhance(sharpness_factor)
    # plt.imshow(im_s_1, cmap='gray')
    # double the size of the image
    (width, height) = (im.width * 2, im.height * 2)
    im_s_1 = im_s_1.resize((width, height))

    if show_images: plt.imshow(im_s_1, cmap='gray')
    image = np.array(im_s_1)
    image = cv2.copyMakeBorder(
        image,
        top=bordersize,
        bottom=bordersize,
        left=bordersize,
        right=bordersize,
        borderType=cv2.BORDER_CONSTANT,
        value=[255, 255, 255]
    )
    orig_image = image.copy()

    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.GaussianBlur(image,(3,3),0)
    if show_images: plt.imshow(image, cmap='gray')
    (thresh, bw_image) = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    if show_images: plt.imshow(bw_image, cmap='gray')
    return bw_image, orig_image

In [9]:
def get_contour_pixels(bw_image,show_images=True):
    contours, _= cv2.findContours(
        bw_image, cv2.RETR_TREE, 
        cv2.CHAIN_APPROX_NONE
        ) 
    # contours = imutils.grab_contours(contours)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[1:]

    # newaxise to incease the dimantion
    img2 = bw_image.copy()[:,:,np.newaxis]
    img2 = np.concatenate([img2, img2, img2], axis = 2)
    if show_images:
        for cnt in contours : 
            cv2.drawContours(img2, [cnt], 0, (255, 0, 0), 1)  

        plt.imshow(img2, cmap='gray')
    return contours

In [14]:
def get_cold_features(img_file, show_images=False ,approx_poly_factor = 0.01):
    bw_image, _ = preprocess_image(img_file,show_images)
    contours = get_contour_pixels(bw_image,show_images)

    rho_bins_edges = np.log10(np.linspace(R_INNER, R_OUTER, N_RHO_BINS))
    feature_vectors = np.zeros((len(K_S), N_BINS))

    # print([len(cnt) for cnt in contours])
    for j, k in enumerate(K_S):
        hist = np.zeros((N_RHO_BINS, N_ANGLE_BINS))
        for cnt in contours:
            epsilon = approx_poly_factor * cv2.arcLength(cnt,True)
            cnt = cv2.approxPolyDP(cnt,epsilon,True)
            n_pixels = len(cnt)

            point_1s = np.array([point[0] for point in cnt])
            x1s, y1s = point_1s[:, 0], point_1s[:, 1]
            point_2s = np.array([cnt[(i + k) % n_pixels][0] for i in range(n_pixels)])
            x2s, y2s = point_2s[:, 0], point_2s[:, 1]

            thetas = np.degrees(np.arctan2(y2s - y1s, x2s - x1s) + np.pi)
            rhos = np.sqrt((y2s - y1s) ** 2 + (x2s - x1s) ** 2)
            rhos_log_space = np.log10(rhos)

            quantized_rhos = np.zeros(rhos.shape, dtype=int)
            for i in range(N_RHO_BINS):
                quantized_rhos += (rhos_log_space < rho_bins_edges[i])

            for i, r_bin in enumerate(quantized_rhos):
                theta_bin = int(thetas[i] // BIN_SIZE) % N_ANGLE_BINS
                hist[r_bin - 1, theta_bin] += 1

        normalised_hist = hist / hist.sum()
        feature_vectors[j] = normalised_hist.flatten()

    return feature_vectors.flatten()

In [17]:
cold_detected_female = []
for dirname, _, filenames in os.walk('..//input//cmp23-handwritten-males-vs-females//Females//Females//'):
    for filename in filenames:
        #img = cv2.imread(os.path.join(dirname,filename),0)
        #if img is not None:
        cold_detected_female.append(get_cold_features(os.path.join(dirname,filename)))
cold_detected_female = np.array(cold_detected_female)

cold_detected_male = []
for dirname, _, filenames in os.walk('..//input//cmp23-handwritten-males-vs-females//Males//Males//'):
    for filename in filenames:
        #img = cv2.imread(os.path.join(dirname,filename),0)
        #if img is not None:
        cold_detected_male.append(get_cold_features(os.path.join(dirname,filename)))
cold_detected_male = np.array(cold_detected_male)

In [18]:
# classifier
X=np.concatenate((cold_detected_male,cold_detected_female),axis=0)

Y=np.concatenate((np.ones(cold_detected_male.shape[0]),
                np.zeros(cold_detected_female.shape[0])),
                axis=0)

In [19]:
X,Y=sklearn.utils.shuffle(X,Y)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2,random_state=109) # 70% training and 30% test

In [34]:
model = LinearSVC()
model.fit(X_train, y_train)

In [35]:
y_pred = model.predict(X_test)
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [36]:
model2 = RandomForestClassifier(n_estimators = 500, max_features = 'sqrt', criterion = 'gini', n_jobs=-1)

# Train the model using the training sets
model2.fit(X_train, y_train)

In [37]:
y_pred = model2.predict(X_test)
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [38]:
model3 = KNeighborsClassifier(n_neighbors=5)

# Train the model using the training sets
model3.fit(X_train, y_train)

y_pred = model3.predict(X_test)
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [46]:
model4 = MLPClassifier(solver='lbfgs', 
                    alpha=1e-5,
                    hidden_layer_sizes=(2,2), 
                    random_state=1)

model4.fit(X_train, y_train) 
y_pred = model4.predict(X_test)
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))