In [1]:
import json
import pickle
import numpy as np
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import classification_report, accuracy_score

In [2]:
def get_class_dict(filename):
    with open(filename, "r") as f:
        class_dict = json.load(f)
    
    return class_dict

# Function that gets the label for an age, given a label dictionary
def get_class(age, class_dict):
    for label, age_interval in class_dict.items():
        if age >= age_interval[0] and age <= age_interval[1]:
            return label

    return -1

### HOG Experiment

In [3]:
binary_dict = get_class_dict('age_intervals_binary.json')

with open('./dataframe.pkl', 'rb') as f:
    hog_df = pickle.load(f)

with open('initial_splits_binary.json', 'r') as f:
    fold_data = json.load(f)

In [4]:
X_train = np.vstack(np.array([item for item in hog_df['hog_features']])[fold_data["train"]])
y_train = np.array([get_class(x, binary_dict) for x in hog_df['age']])[fold_data["train"]]

X_test = np.vstack(np.array([item for item in hog_df['hog_features']])[fold_data["test"]])
y_test = np.array([get_class(x, binary_dict) for x in hog_df['age']])[fold_data["test"]]

In [5]:
svm_hog = SVC(C=0.1, kernel='poly', random_state=42)
svm_hog.fit(X_train, y_train)
predictions = svm_hog.predict(X_test)
report = classification_report(y_test, predictions, digits=4)
print(f'Final test metrics are: \n')
print(report)

Final test metrics are: 

              precision    recall  f1-score   support

           0     0.6136    0.1441    0.2334       562
           1     0.9082    0.9894    0.9471      4809

    accuracy                         0.9009      5371
   macro avg     0.7609    0.5668    0.5902      5371
weighted avg     0.8774    0.9009    0.8724      5371



In [6]:
print('Accuracy: ', accuracy_score(y_test, predictions))

Accuracy:  0.9009495438465835


### LBP Experiment

In [7]:
with open('dataframe_lbp_16p_2r.pkl', 'rb') as f:
    lbp_df = pickle.load(f)

In [8]:
X_train = np.vstack(np.array([item for item in lbp_df['lbp_16p_2r']])[fold_data["train"]])
y_train = np.array([get_class(x, binary_dict) for x in lbp_df['age']])[fold_data["train"]]

X_test = np.vstack(np.array([item for item in lbp_df['lbp_16p_2r']])[fold_data["test"]])
y_test = np.array([get_class(x, binary_dict) for x in lbp_df['age']])[fold_data["test"]]

In [9]:
svm_lbp = SVC(C=1000, kernel='rbf', random_state=42)
svm_lbp.fit(X_train, y_train)
predictions = svm_lbp.predict(X_test)
report = classification_report(y_test, predictions, digits=4)
print(f'Final test metrics are: \n')
print(report)

Final test metrics are: 

              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       562
           1     0.8954    1.0000    0.9448      4809

    accuracy                         0.8954      5371
   macro avg     0.4477    0.5000    0.4724      5371
weighted avg     0.8017    0.8954    0.8459      5371



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
print('Accuracy: ', accuracy_score(y_test, predictions))

Accuracy:  0.895363991807857
