In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import pairwise_distances

In [2]:
def feature_initial(input_list, index, info = np.nan):
    ### Construct initial features for training images 
    ### Input: input_list: a list of fiducial points; index: train index or test index; 
    ###        info: optional labeled data frame
    ### Output: a data frame containing: features and a column of label (if info is not provided, then only features)

    ### here is an example of extracting pairwise distances between fiducial points
    ### Step 1: Write a function pairwise_dist to calculate pairwise distance of items in a vector
    def pairwise_dist(vec):
        n = len(vec)
        dist_matrix = pairwise_distances(np.array(vec).reshape(-1,1),metric='euclidean')
        return list(dist_matrix[np.triu_indices(n,k=1)])
    
    ### Step 2: Write a function pairwise_dist_result to apply function in Step 1 to column of a matrix 
    def pairwise_dist_result(mat):
        ### input: a n*2 matrix(e.g. fiducial_pt_list[[1]]), output: a vector(length n(n-1))
        return list(np.transpose(np.apply_along_axis(pairwise_dist,0,mat)).flatten())
    
    ### Step 3: Apply function in Step 2 to selected index of input list, output: a feature matrix with ncol = n(n-1) = 78*77 = 6006
    pairwise_dist_feature = ((np.array(list(map(pairwise_dist_result, [input_list[i] for i in index])))))
    pairwise_dist_feature.shape
    
    colnames = ['feature'+str(i) for i in range(pairwise_dist_feature.shape[1])]
    df = pd.DataFrame(pairwise_dist_feature,columns=colnames)
    #if there's a label column then include it in the output, otherwise don't include a label column
    try:
        label_df = pd.DataFrame(list(info['label'].iloc[index]),columns=['labels'])
        pairwise_data = pd.concat([df,label_df],axis=1)
    except ValueError:
        pairwise_data = df
        
    return pairwise_data