In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from skimage.feature import local_binary_pattern # Local Binary Pattern function
from skimage import io
from scipy.stats import itemfreq # To calculate a normalized histogram 
from sklearn.preprocessing import normalize
from PIL import Image
import glob
import cv2
from natsort import natsorted, ns
import cvutils
import csvs
import os
import dlib
from sklearn.ensemble import IsolationForest
from IPython import display
from IPython.display import HTML
import progressbar
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier

In [None]:
# Create list of image names and corresponding gender classifications
image_dic = pd.read_excel('../Datasets/labels_A.xlsx')
image_dic = image_dic[['img_name.jpg', 'gender']] # Choose columns which are of importance
df = pd.DataFrame(image_dic)
df.to_excel('../Datasets/source_genders/labels_A1.xlsx',index=False)

In [None]:
# Separate male and female images and corresponding labels into folders

source_genders = pd.read_excel('../Datasets/source_genders/labels_A1.xlsx')
source_images_file_paths = glob.glob ("../Datasets/source_images/*.jpg") #find all paths which match the given path
source_images_file_paths = natsorted(source_images_file_paths) #sort the list of file names such that the image list will be in the correct order

male_images = []
female_images = []

male_directory = "../Datasets/sorted_sets_A1/male/"
female_directory = "../Datasets/sorted_sets_A1/female/"

for file_path in source_images_file_paths:
    image = cv2.imread(file_path, cv2.COLOR_RGB2BGR) #read the image
    image_name = os.path.basename(file_path)
    image_label = source_genders[source_genders['img_name.jpg']==image_name]['gender'].iloc[0]
    if(image_label == 1):
        male_images.append(image)
        directory = ''.join([male_directory,os.path.basename(image_name)])
        cv2.imwrite(directory, image)
    else:
        female_images.append(image)
        directory = ''.join([female_directory,os.path.basename(image_name)])
        cv2.imwrite(directory, image)


In [10]:
# Detecting faces and cropping

genders = ["male", "female"] #Define genders

faceDet = cv2.CascadeClassifier("../OpenCV_FaceCascade/haarcascade_frontalface_default.xml")
faceDet_two = cv2.CascadeClassifier("../OpenCV_FaceCascade/haarcascade_frontalface_alt2.xml")
faceDet_three = cv2.CascadeClassifier("../OpenCV_FaceCascade/haarcascade_frontalface_alt.xml")
faceDet_four = cv2.CascadeClassifier("../OpenCV_FaceCascade/haarcascade_frontalface_alt_tree.xml")

net = cv2.dnn.readNetFromCaffe("../deep-learning-face-detection/deploy.prototxt.txt", "../deep-learning-face-detection/res10_300x300_ssd_iter_140000.caffemodel")
 
def detect_face_DNN(gender):
    files = glob.glob("../Datasets/sorted_sets_A1/%s/*.jpg" %gender) #Get list of all images with gender
    filenumber = 0
    for f in files:
        # load the input image and construct an input blob for the image
        # by resizing to a fixed 300x300 pixels and then normalizing it
        image = cv2.imread(f)
        (h, w) = image.shape[:2]
        blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0))
        net.setInput(blob)
        detections = net.forward()
        print(detections.shape[2])
        # loop over the detections
        for i in range(0, detections.shape[2]):
            # extract the confidence (i.e., probability) associated with the
            # prediction
            confidence = detections[0, 0, i, 2]

            # filter out weak detections by ensuring the `confidence` is greater than the minimum confidence
            if confidence > 0.999:
                # compute the (x, y)-coordinates of the bounding box for the object
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (startX, startY, endX, endY) = box.astype("int")
                cropped = image[startY:endY, startX:endX] #Cut the frame to size
                try:
                    out = cv2.resize(cropped, (300, 300)) #Resize face so all images have same size
                    cv2.imwrite("../Datasets/A1_dataset_DNN/%s/%s.jpg" %(gender, filenumber), out) #Write image - don' need to worry about keeping track of labels associated because already sorted
                    filenumber += 1 #Increment image number
                except:
                    pass #If error, pass file

                # draw the bounding box of the face along with the associated
                # probability
                #text = "{:.2f}%".format(confidence * 100)
                #y = startY - 10 if startY - 10 > 10 else startY + 10
                #cv2.rectangle(image, (startX, startY), (endX, endY),
                 #   (0, 0, 255), 2)
                #cv2.putText(image, text, (startX, y),
                 #   cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)

def detect_faces(gender):
    files = glob.glob("../Datasets/sorted_sets_A1/%s/*.jpg" %gender) #Get list of all images with gender
    filenumber = 0
    for f in files:
        frame = cv2.imread(f) #Open image
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) #Convert image to grayscale
        #Detect face using 4 different classifiers
        face = faceDet.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=10, minSize=(5, 5), flags=cv2.CASCADE_SCALE_IMAGE)
        face_two = faceDet_two.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=10, minSize=(5, 5), flags=cv2.CASCADE_SCALE_IMAGE)
        face_three = faceDet_three.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=10, minSize=(5, 5), flags=cv2.CASCADE_SCALE_IMAGE)
        face_four = faceDet_four.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=10, minSize=(5, 5), flags=cv2.CASCADE_SCALE_IMAGE)
        #Go over detected faces, stop at first detected face, return empty if no face.
        if len(face) == 1:
            facefeatures = face
        elif len(face_two) == 1:
            facefeatures = face_two
        elif len(face_three) == 1:
            facefeatures = face_three
        elif len(face_four) == 1:
            facefeatures = face_four
        else:
            facefeatures = ""
        #Cut and save face
        for (x, y, w, h) in facefeatures: #get coordinates and size of rectangle containing face
            #print ("face found in file: %s" %f)
            gray = gray[y:y+h, x:x+w] #Cut the frame to size
            try:
                out = cv2.resize(gray, (350, 350)) #Resize face so all images have same size
                cv2.imwrite("../Datasets/A1_dataset/%s/%s.jpg" %(gender, filenumber), out) #Write image - don' need to worry about keeping track of labels associated because already sorted
            except:
                pass #If error, pass file
        filenumber += 1 #Increment image number
for gender in genders:
    detect_face_DNN(gender) #Call function
    #detect_faces(gender)

200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200


In [36]:
# gender detection using fisher face and LBPH  

genders = ["male", "female"] #Define genders

c= 1405 #1000
gamma = 9.7 #10
tol = 0.01
clf = SVC(C=c, kernel='rbf', gamma = gamma, tol = tol)

fishface = cv2.face.FisherFaceRecognizer_create() #Initialize fisher face classifier
radius = 1    
no_neighbours = 8
LBPH = cv2.face.LBPHFaceRecognizer_create(radius, no_neighbours)

def get_files(gender): #Define function to get file list, randomly shuffle it and split 80/20
    files = glob.glob("../Datasets/A1_dataset_DNN/%s/*.jpg" %gender)
    return files

def get_features(image):
    M = 25
    N = 25
    tiles = [image[x:x+M,y:y+N] for x in range(0,image.shape[0],M) for y in range(0,image.shape[1],N)]
    radius = 3    
    no_points = 8*radius  # Number of points to be considered as neighbourers
    eps=1e-7
    global_hist_norm = []
    for tile in tiles:
        lbp = local_binary_pattern(tile, no_points, radius, method='uniform') # Uniform LBP is used
        (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, no_points + 3), range=(0, no_points + 2))
        hist = hist.astype("float")
        hist /= (hist.sum() + eps)
        hist_norm = hist
        global_hist_norm.append(hist_norm)
        #hist = itemfreq(lbp.ravel()) # Calculate the histogram -> why do we ravel
        #hist_norm = hist[:, 1]/sum(hist[:, 1]) # Normalize the histogram
    s = np.asarray(global_hist_norm).shape
    global_hist_norm = np.reshape(global_hist_norm,(s[0]*s[1]))
    return global_hist_norm

def SVM_paramter_tuning(X_train, y_train, X_test, y_test):
    
    tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-2, 1e-1, 1,1e1,1e2, 1e3],
                        'C': [1e-3, 1e-2, 1e-1, 1,1e1,1e2, 1e3], 'tol': [1e-5,1e-4,1e-3, 1e-2, 1e-1],
                        'decision_function_shape':['OVO', 'OVR']}]

    scores = ['precision', 'recall']

    for score in scores:
        print("# Tuning hyper-parameters for %s" % score)
        print()
        clf = GridSearchCV(SVC(), tuned_parameters, cv=5, scoring='%s_macro' % score)
        clf.fit(X_train, y_train)
        print("Best parameters set found on development set:")
        print()
        print(clf.best_params_)
        print()
        print("Grid scores on development set:")
        print()
        means = clf.cv_results_['mean_test_score']
        stds = clf.cv_results_['std_test_score']
        for mean, std, params in zip(means, stds, clf.cv_results_['params']):
            print("%0.3f (+/-%0.03f) for %r"
                  % (mean, std * 2, params))
        print()
        print("Detailed classification report:")
        print()
        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.")
        print()
        y_true, y_pred = y_test, clf.predict(X_test)
        print(classification_report(y_true, y_pred))
        print()

def make_sets(t):
    image_inputs = []
    image_labels = []
    for gender in genders:
        image_files = get_files(gender)
        #Append data and generate labels 0-1
        for item in image_files:
            image = cv2.imread(item) #open image
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #convert to grayscale
            features = get_features(gray)
            if(t == 1):
                image_inputs.append(features) #append image array to training data list
                image_labels.append(genders.index(gender))
            else:
                image_inputs.append(gray) #append image array to training data list
                image_labels.append(genders.index(gender))
            
    training_data, prediction_data, training_labels, prediction_labels = train_test_split(image_inputs,image_labels)
    print(len(training_data))
    print(len(training_labels))
    print(len(prediction_data))
    print(len(prediction_labels))
    return training_data, training_labels, prediction_data, prediction_labels

def train_FF_model(training_data, training_labels):
    print ("training fisher face classifier")
    print ("size of training set is:", len(training_labels), "images")
    LBPH.train(training_data, np.asarray(training_labels))

def train_SVM_model(xTrain, yTrain):
    print ("training SVM classifier")
    print ("size of training set is:", len(yTrain), "images")
    clf.fit(xTrain, yTrain)

def run_FF_classifier(prediction_data, prediction_labels):
    print ("predicting classification set")
    cnt = 0
    correct = 0
    incorrect = 0
    pred_list = []
    for image in prediction_data:
        pred, conf = LBPH.predict(image)
        pred_list.append(pred)

    accuracy = accuracy_score(prediction_labels, pred_list)
    return accuracy

def run_SVM_classifier(xTest,yTest):
    yPredict = clf.predict(xTest)
    return accuracy_score(yTest,yPredict)

In [37]:
# Main

training_data_FF, training_labels_FF, prediction_data_FF, prediction_labels_FF = make_sets(0)
training_data_SVM, training_labels_SVM, prediction_data_SVM, prediction_labels_SVM = make_sets(1)

print(np.asarray(training_data_SVM).shape)
print(np.asarray(training_data_FF).shape)

#SVM_paramter_tuning(training_data_SVM, training_labels_SVM, prediction_data_SVM, prediction_labels_SVM)

train_FF_model(training_data_FF, training_labels_FF)
train_SVM_model(training_data_SVM, training_labels_SVM)
correct_FF = run_FF_classifier(prediction_data_FF, prediction_labels_FF)
correct_SVM = run_SVM_classifier(prediction_data_SVM, prediction_labels_SVM)
print ("FF got", correct_FF, "percent correct!")
print ("SVM got", correct_SVM, "percent correct!")

IndexError: tuple index out of range

In [29]:
reshaped = np.reshape(training_data_SVM, (np.asarray(training_data_SVM).shape[0], np.asarray(training_data_SVM).shape[1]*np.asarray(training_data_SVM).shape[2]))
print(np.asarray(reshaped)[:10])
print(np.asarray(training_data_SVM)[:10])

[[0.016  0.0096 0.0032 ... 0.     0.0016 0.064 ]
 [0.048  0.0096 0.0112 ... 0.0016 0.0112 0.1072]
 [0.0624 0.0176 0.0464 ... 0.008  0.0256 0.1312]
 ...
 [0.0608 0.0144 0.016  ... 0.0128 0.0256 0.1936]
 [0.056  0.0208 0.0128 ... 0.     0.     0.0624]
 [0.0384 0.0096 0.0256 ... 0.     0.0016 0.0816]]
[[[0.016  0.0096 0.0032 ... 0.0048 0.2656 0.072 ]
  [0.008  0.0064 0.0336 ... 0.0064 0.1744 0.1664]
  [0.0208 0.0096 0.0224 ... 0.0016 0.216  0.1488]
  ...
  [0.0688 0.0144 0.0208 ... 0.0048 0.0144 0.1888]
  [0.0208 0.0272 0.0256 ... 0.0064 0.008  0.1504]
  [0.0208 0.0112 0.0176 ... 0.     0.0016 0.064 ]]

 [[0.048  0.0096 0.0112 ... 0.0096 0.0192 0.1344]
  [0.0464 0.0208 0.0224 ... 0.0032 0.0112 0.1472]
  [0.0656 0.0256 0.0368 ... 0.008  0.0256 0.288 ]
  ...
  [0.056  0.0208 0.0304 ... 0.0048 0.0256 0.1968]
  [0.0544 0.0112 0.0208 ... 0.0016 0.0224 0.1792]
  [0.0224 0.0208 0.0336 ... 0.0016 0.0112 0.1072]]

 [[0.0624 0.0176 0.0464 ... 0.0064 0.0176 0.2544]
  [0.0832 0.0144 0.0384 ... 0.008 

In [None]:
# Create list of image names and corresponding gender classifications
image_dic = pandas.read_excel('../Datasets/labels_A.xlsx')
image_dic = image_dic[['img_name.jpg', 'gender']] # Choose columns which are of importance
df = pandas.DataFrame(image_dic)
image_dic_list = df.values.tolist()

# Create naturally sorted list of file paths for each image
file_paths = glob.glob ("../Datasets/img_A/*.jpg") #find all paths which match the given path
file_paths = natsorted(file_paths) #sort the list of file names such that the image list will be in the correct order


In [None]:
#just cropping - seems to have been aligned already
def detect_faces(image):
    # Create a face detector
    face_detector = dlib.get_frontal_face_detector()
    # Run detector and get bounding boxes of the faces on image.
    detected_faces = face_detector(image, 1)
    face_frames = [(x.left(), x.top(),
                    x.right(), x.bottom()) for x in detected_faces]
    return face_frames

# Set up progress bar
max_val = len(file_paths)
pb = display.ProgressBar(max_val)
pb.display()
i = 0

# initiate lists and counter
faces = []
imgs_used = []
imgs_not_used = []
face_counter = 0

for file_path in file_paths:
    # Load image
    image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) #read the image as grayscale
    # Detect faces
    detected_faces = detect_faces(image)
    # Find images where only one face is detected
    if len(detected_faces) == 1:
        # Crop faces
        for n, face_rect in enumerate(detected_faces):
            face = Image.fromarray(image).crop(face_rect)
            face_counter += 1
            faces.append(face)
        imgs_used.append(file_path)
    else:
        imgs_not_used.append(file_path)
    # Update progress bar
    pb.progress = i + 1
    i += 1


In [None]:
print(len(imgs_not_used))
print(type(faces[0]))

In [None]:
# Updating image label dictionary and path list

print("Original path list length:")
print(len(file_paths))
print("Number of images used:")
print(len(imgs_used))
print("Number of images not used:")
print(len(imgs_not_used))

# Removing unsued image paths from original image path list
for img_not_used in imgs_not_used:
    try:
        file_paths.remove(img_not_used)
    except:
       continue 
    
print("New path list length:")
print(len(file_paths))

# Removing name and label pairs which are not used from original image dictionary
for img_not_used in imgs_not_used:
    i = 0
    for image in image_dic_list:
        if image[0] == os.path.basename(img_not_used):
            del image_dic_list[i]
            break
        i += 1
        



In [None]:
print(image_dic_list[:10])

In [None]:
# Saving new image dictionary as CSV file
df = pandas.DataFrame(image_dic_list, columns= ['img_name.jpg', 'gender'])
export_csv = df.to_excel (r'../Datasets/labels_A_updated_2.xlsx')


In [None]:
# Saving cropped and gray-scale images to new directory for later use

new_directory = '../Datasets/cropped2_img_A/'
print("writing faces to disk...")
if os.path.exists(new_directory):
    print("Path already exists")
else:
    print('creating output directory: %s'%(new_directory))
    os.mkdir(new_directory)
    i = 0
    for face in faces:
        basewidth = 256
        wpercent = (basewidth / float(face.size[0]))
        hsize = int((float(face.size[1]) * float(wpercent)))
        face = face.resize((basewidth, hsize), Image.ANTIALIAS)
        face = face.crop(( 0, hsize - 255 , basewidth , hsize))
        face.save(''.join([new_directory,os.path.basename(imgs_used[i])]))
        i += 1
    print("wrote %d faces out of %d"%(len(faces), max_val))  


In [None]:
# Create updated list of image names and corresponding gender classifications
image_dic = pandas.read_excel('../Datasets/labels_A_updated.xlsx')
image_dic = image_dic[['img_name.jpg', 'gender']] # Choose columns which are of importance
df = pandas.DataFrame(image_dic)
image_dic_list = df.values.tolist()

# Create naturally sorted list of file paths for each cropped gray scale image image
updated_file_paths = glob.glob ("../Datasets/cropped2_img_A/*.jpg") #find all paths which match the given path
updated_file_paths = natsorted(updated_file_paths) #sort the list of file names such that the image list will be in the correct order

print(updated_file_paths[:10])
print(image_dic_list[:10])

In [None]:
width = 256
height = 255
changed = 0
for file in updated_file_paths:
    image = cv2.imread(file, cv2.IMREAD_GRAYSCALE)
    if image.shape != (255,256):
        print(false)
print(changed)
image = cv2.imread(updated_file_paths[6], cv2.IMREAD_GRAYSCALE)
print(image.shape == (255,256))
plt.imshow(image)

In [None]:
# align cropped images

def detect_faces_for_alignement(image):
    # Create a face detector
    face_detector = dlib.get_frontal_face_detector()
    # Run detector and get bounding boxes of the faces on image.
    detected_faces = face_detector(image, 1)
    face_frames = [(x.left(), x.top(), x.right(), x.bottom()) for x in detected_faces]
    return detected_faces

faces = []
imgs_used_2 = []
imgs_not_used_2 = []
face_counter = 0
width = 256
height = 255
for file_path in updated_file_paths:    
    # Load image
    image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) #read the image as grayscale
    # Detect & predict faces
    detected_faces = detect_faces_for_alignement(image)
    if len(detected_faces) == 1:
        # align faces
        predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
        shape = predictor(image, detected_faces[0]) # get facial features
        shape = np.array([(shape.part(j).x, shape.part(j).y) for j in range(shape.num_parts)])
        # center and scale face around mid point between eyes
        center_eyes = shape[27].astype(np.int)
        eyes_d = np.linalg.norm(shape[36]-shape[45])
        face_size_x = int(eyes_d * 2.)
        # rotate to normalized angle
        d = (shape[45] - shape[36]) / eyes_d # normalized eyes-differnce vector (direction)
        a = np.rad2deg(np.arctan2(d[1],d[0])) # angle
        scale_factor = float(256) / float(face_size_x * 2.) # scale to fit in output_size
        # rotation (around center_eyes) + scale transform
        M = np.append(cv2.getRotationMatrix2D((center_eyes[0], center_eyes[1]),a,scale_factor),[[0,0,1]], axis=0)
        # apply shift from center_eyes to middle of output_size 
        M1 = np.array([[1.,0.,-center_eyes[0]+width/2.],
                       [0.,1.,-center_eyes[1]+height/2.],
                       [0,0,1.]])
        # concatenate transforms (rotation-scale + translation)
        M = M1.dot(M)[:2]
        # warp
        face = cv2.warpAffine(image, M, (width, height), borderMode=cv2.BORDER_REPLICATE)
        #face = Image.fromarray(image_aligned)
        #face = face.resize((width,height), Image.ANTIALIAS)
        face = cv2.resize(face,(width,height))
        faces.append(face)
        face_counter += 1
        print(file_path)
        imgs_used_2.append(file_path)
    else:
        imgs_not_used_2.append(file_path)        
print(face_counter)
faces = np.asarray(faces)


In [None]:
print(len(imgs_used_2))
print(imgs_not_used_2[:10])
print(faces[0].shape)


In [None]:
#saving aligned gray and crapped
new_directory = '../Datasets/again_aligned_cropped_img_A/'
print("writing faces to disk...")
if os.path.exists(new_directory):
    print("Path already exists")
else:
    print('creating output directory: %s'%(new_directory))
    os.mkdir(new_directory)
    i = 0
    for face in faces:
        face = Image.fromarray(face).crop(( face.shape[1]/4, face.shape[0]/4 , face.shape[1]*(3/4) , face.shape[0]*(3/4)))
        #face = face.crop(( 0, hsize - 255 , basewidth , hsize))
        #cv2.imwrite(''.join([new_directory,os.path.basename(imgs_used_2[i])]), face)
        face.save(''.join([new_directory,os.path.basename(imgs_used_2[i])]))
        i += 1  


In [None]:
for img_not_used in imgs_not_used_2:
    i = 0
    for image in image_dic_list:
        if image[0] == os.path.basename(img_not_used):
            del image_dic_list[i]
            break
        i += 1
        
# Saving new image dictionary as CSV file
df = pandas.DataFrame(image_dic_list, columns= ['img_name.jpg', 'gender'])
export_csv = df.to_excel (r'../Datasets/again_aligned_labels_A_updated_2.xlsx')

In [None]:
# Create updated list of image names and corresponding gender classifications
image_dic = pandas.read_excel('../Datasets/labels_A_updated_2.xlsx')
image_dic = image_dic[['img_name.jpg', 'gender']] # Choose columns which are of importance
df = pandas.DataFrame(image_dic)
image_dic_list = df.values.tolist()

# Create naturally sorted list of file paths for each cropped gray scale image image
updated_file_paths = glob.glob ("../Datasets/cropped2_img_A/*.jpg") #find all paths which match the given path
updated_file_paths = natsorted(updated_file_paths) #sort the list of file names such that the image list will be in the correct order

print(updated_file_paths[:10])
print(len(updated_file_paths))
print(image_dic_list[:10])
print(len(image_dic_list))

In [None]:
#LBP on updated grayscale cropped images

images_list = []
image_path = []
image_inputs = []
image_labels = []

i = 0
for file_path in updated_file_paths:
    image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) #read the image as grayscale
    radius = 3    
    no_points = 8*radius  # Number of points to be considered as neighbourers
    eps=1e-7
    lbp = local_binary_pattern(image, no_points, radius, method='uniform') # Uniform LBP is used
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, no_points + 3), range=(0, no_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + eps)
    hist_norm = hist
    #hist = itemfreq(lbp.ravel()) # Calculate the histogram -> why do we ravel
    #hist_norm = hist[:, 1]/sum(hist[:, 1]) # Normalize the histogram
    images_list.append(image)    
    image_path.append(file_path)# Append image path    
    image_inputs.append(hist_norm)# Append histogram    
    image_labels.append(image_dic_list[i][1]) # Append class label
    i = i+1


In [None]:
# use raw pixels only as inputs

images_list = []
image_path = []
image_inputs = []
image_labels = []

i = 0
for file_path in updated_file_paths:
    image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) #read the image as grayscale
    features = np.reshape(image, (255*256))
    image_inputs.append(features)# Append    
    image_labels.append(image_dic_list[i][1]) # Append class label
    i = i+1
image_inputs = normalize(image_inputs, axis=1, norm='l1')



In [None]:
print(image_inputs[:1])
print(image_labels[:10])
np.arange(11, 17, 0.5).tolist()


In [None]:
# Split the dataset in two equal parts
X_train, X_test, y_train, y_test = train_test_split(image_inputs,image_labels, test_size=0.5)

# Set the parameters by cross-validation
tuned_parameters = [{'kernel': ['rbf'], 'gamma': np.arange(9,10,0.1).tolist(),
                     'C': np.arange(1400,1420,5).tolist(), 'tol': [1e-5,1e-4,1e-3, 1e-2, 1e-1]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000, 10000]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()
    clf = GridSearchCV(SVC(), tuned_parameters, cv=5, scoring='%s_macro' % score)
    clf.fit(X_train, y_train)
    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()
    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()
    

In [None]:
# Split the data into training and testing(75% training and 25% testing data)
xTrain,xTest,yTrain,yTest=train_test_split(image_inputs,image_labels)

In [None]:
# Create adaboost classifer object
abc = AdaBoostClassifier(n_estimators=50,
                         learning_rate=1)
# Train Adaboost Classifer
model = abc.fit(xTrain, yTrain)

#Predict the response for test dataset
y_pred = model.predict(xTest)

print(accuracy_score(yTest,y_pred))

In [None]:
c=1405
gamma = 9.7
tol = 0.01
svc = SVC(C=c, kernel='rbf', gamma = gamma, tol = tol, probability = True)

# Create adaboost classifer object
abc =AdaBoostClassifier(n_estimators=50, base_estimator = svc, learning_rate=1)

# Train Adaboost Classifer
model = abc.fit(xTrain, yTrain)

#Predict the response for test dataset
y_pred = model.predict(xTest)

print(accuracy_score(yTest,y_pred))

In [None]:
gnb = GaussianNB()

#Train the model using the training sets
model = gnb.fit(xTrain, yTrain)

#Predict the response for test dataset
y_pred = model.predict(xTest)
print(accuracy_score(yTest,y_pred))

In [None]:
gnb = GaussianNB()

# Create adaboost classifer object
abc =AdaBoostClassifier(n_estimators=50, base_estimator = gnb, learning_rate=1)

# Train Adaboost Classifer
model = abc.fit(xTrain, yTrain)

#Predict the response for test dataset
y_pred = model.predict(xTest)

print(accuracy_score(yTest,y_pred))

In [None]:
lr_list = [0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1]

for learning_rate in lr_list:
    gb_clf = GradientBoostingClassifier(n_estimators=20, learning_rate=learning_rate, max_features=2, max_depth=2, random_state=0)
    gb_clf.fit(xTrain, yTrain)

    print("Learning rate: ", learning_rate)
    print("Accuracy score (training): {0:.3f}".format(gb_clf.score(xTrain, yTrain)))
    print("Accuracy score (validation): {0:.3f}".format(gb_clf.score(xTest, yTest)))

In [None]:
xgb_clf = XGBClassifier()
model = xgb_clf.fit(np.asarray(xTrain), np.asarray(yTrain))
score = model.score(xTest, yTest)
print(score)


In [None]:
modelTxtc=1405
gamma = 9.7
tol = 0.01
clf = SVC(C=c, kernel='rbf', gamma = gamma, tol = tol)
clf.fit(xTrain, yTrain)
yPredict = clf.predict(xTest)
print(accuracy_score(yTest,yPredict))