# Facial Landmark Analysis for Detecting Visual Impairment in Mobile LogMAR Test

In [None]:
import glob
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.lines as mlines
import seaborn as sns
import re

from scipy import stats
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc, RocCurveDisplay, roc_auc_score
import xgboost as xgb
import shap

In [None]:
pd.set_option('future.no_silent_downcasting', True)

In [None]:
plt.rcParams.update({'font.size': 12})
dpi = 300

## Study statistics

In [None]:
df = pd.read_csv('../temp/train_data.csv')

In [None]:
df.describe()

In [None]:
df['label_bin'] = df['label'].replace('all', 0).replace('some', 1).replace('none', 1)
df['label_mult'] = df['label'].replace('all', 0).replace('some', 1).replace('none', 2)

In [None]:
unique_subjects_df = df.sort_values('logMAR').drop_duplicates(subset='subject', keep='first').reset_index()[['index', 'subject', 'hasVisualAid', 'usesVisualAid','needsNewVisualAid', 'visualAidDiopters', 'canReadCinemaSubtitles', 'canReadSmartphoneScreen', 'canReadRoadSigns', 'wasEasy', 'distance', 'logMAR']]

In [None]:
unique_subjects_df['visualAidDiopters'] = unique_subjects_df['visualAidDiopters'].str.replace(',', '.').astype(float)

In [None]:
n = len(unique_subjects_df)
hasVisualAidTrue = np.sum(unique_subjects_df.hasVisualAid)
usesVisualAidTrue = np.sum(unique_subjects_df.usesVisualAid)
needsNewVisualAidTrue = np.sum(unique_subjects_df.needsNewVisualAid)
canReadCinemaSubtitlesTrue = np.sum(unique_subjects_df.canReadCinemaSubtitles)
canReadSmartphoneScreenTrue = np.sum(unique_subjects_df.canReadSmartphoneScreen)
canReadRoadSignsTrue = np.sum(unique_subjects_df.canReadRoadSigns)
wasEasyTrue = np.sum(unique_subjects_df.wasEasy)

In [None]:
distances = ''
q = unique_subjects_df.distance.value_counts()
for k in q.index:
    distances += str(k) + 'm: ' + str(q[k]) + ' \t '
    
print('Number of runs:\t\t', n)
print('Test conducted at \t', distances)
print(
    'Range of diopters:\t',
    np.round(np.mean(unique_subjects_df.visualAidDiopters), 2),
    '±',
    np.round(np.std(unique_subjects_df.visualAidDiopters), 2)
)
print('Do you have a visual aid?\t\t\t\t\t\t', hasVisualAidTrue, 'yes \t', n-hasVisualAidTrue, 'no')
print('Is this test being conducted with a visual aid?\t\t\t\t', usesVisualAidTrue, 'yes \t', n-usesVisualAidTrue, 'no')
print('Do you have issues reading on a smartphone screen?\t\t\t', canReadSmartphoneScreenTrue, 'yes \t\t', n-canReadSmartphoneScreenTrue, 'no')
print('Do you have issues reading cinema subtitles and signs on the subway?\t', canReadCinemaSubtitlesTrue, 'yes \t', n-canReadCinemaSubtitlesTrue, 'no')
print('Do you have issues reading road signs?\t\t\t\t\t', canReadRoadSignsTrue, 'yes \t', n-canReadRoadSignsTrue, 'no')
print('Was the eye test easy for you?\t\t\t\t\t\t', wasEasyTrue, 'yes \t', n-wasEasyTrue, 'no')
print('Do you think you need a (new) visual aid?\t\t\t\t', needsNewVisualAidTrue, 'yes \t', n-needsNewVisualAidTrue, 'no')

In [None]:
unique_subjects_df.corr().round(2)

## Data Analysis

In [None]:
df = df[df.letter_row > 1]

### Explorative Data Analysis and Statistical analysis

In [None]:
plt_df = pd.DataFrame()
df_list = []
substring = 'mean'
for k in ['jaw', 'eye', 'mouth', 'cheek', 'brow', 'nose']:
    filtered_columns = [col for col in df.columns if ((substring in col) & (k in col))] + ['label', 'logMAR', 'suppress_squinting', 'letter_row', 'subject']
    temp = pd.DataFrame(df[filtered_columns].groupby(['label', 'logMAR', 'suppress_squinting', 'letter_row', 'subject']).mean().mean(axis=1)).reset_index()
    temp['feature'] = k
    df_list.append(temp[temp.suppress_squinting == False])
    plt_df = pd.concat([plt_df, temp])
plt_df = plt_df.reset_index()
plt_df = plt_df.rename({0: 'value'}, axis=1)

In [None]:
for k in plt_df.feature.unique():
    plt.figure()
    temp = plt_df[plt_df.feature == k] #['value']
    sns.histplot(temp, x='value', hue='label').set_title(k)
    plt.plot()

In [None]:
sns.set_context("notebook", font_scale=1.5)

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
hue_order = ['all', 'some', 'none']
sns.boxplot(data=plt_df, x="feature", y="value", hue="label", hue_order=hue_order)
ax.legend(title='Label', loc='upper left')
fig.savefig('../media/boxplot_feature.png', dpi=dpi, bbox_inches='tight')

From the two plots above (histogram and boxplot) we can see that at least two requirements for ANOVA are fulfilled:
1. Samples origina from a normal distribution
2. Standard deviation is equal among the groups

In [None]:
for k in plt_df.feature.unique():
    filt = plt_df.feature == k
    a = plt_df[filt & (plt_df.label == 'all')]['value']
    b = plt_df[filt & (plt_df.label == 'some')]['value']
    c = plt_df[filt & (plt_df.label == 'none')]['value']
    temp = stats.f_oneway(a, b, c)
    print(k, "\t p-value:", np.round(temp.pvalue, 4))

In [None]:
g = sns.FacetGrid(plt_df[(plt_df.letter_row > -1) & (plt_df.letter_row < 16)], col="feature", col_wrap=3)
g.map(sns.lineplot, "logMAR", 'value', "suppress_squinting")
g.set_xlabels('logMAR')

border_none = np.mean(plt_df[(plt_df.suppress_squinting == False) & (plt_df.label == 'none')]['logMAR'])
border_some = np.mean(plt_df[(plt_df.suppress_squinting == False) & (plt_df.label == 'some')]['logMAR'])

for ax, (name, group) in zip(g.axes.flat, plt_df.groupby('feature')):
    ax.set_xlim(1.1, -0.4)
    ax.axvline(x=border_none, color='red', linestyle='--', linewidth=1)
    ax.axvline(x=border_some, color='orange', linestyle='--', linewidth=1)

red_line = mlines.Line2D([], [], color='red', linestyle='--', linewidth=1, label='no letters')
orange_line = mlines.Line2D([], [], color='orange', linestyle='--', linewidth=1, label='some letters')

g.fig.legend(handles=[red_line, orange_line], title="Mean logMAR of the event: Could read", bbox_to_anchor=(0.69, 1.15), loc='upper center', ncol=2)
handles, labels = ax.get_legend_handles_labels()
g.fig.legend(handles, labels, title="Suppress squinting", bbox_to_anchor=(0.24, 1.15), loc='upper center', ncol=3)

g.savefig('../media/change_feature.png', dpi=300)
plt.show()

In [None]:
plt_df['label'] = pd.Categorical(plt_df['label'], categories=["all", "some", "none"], ordered=True)

g = sns.FacetGrid(plt_df[(plt_df.letter_row > 1) & (plt_df.letter_row < 16)], col="feature", col_wrap=3)
label_order = ["all", "some", "none"]
g.map(sns.lineplot, "label", 'value', "suppress_squinting")
g.set_xlabels('label')

handles, labels = ax.get_legend_handles_labels()
g.fig.legend(handles, labels, title="Suppress squinting", bbox_to_anchor=(0.24, 1.15), loc='upper center', ncol=3)
# Saving and showing the plot
g.savefig('../media/change_feature_individual.png', dpi=300)
plt.show()

### Sensor Accuracy

In [None]:
def zscore_normalize(x):
    return (x - x.mean()) / x.std()

In [None]:
df_accuracy = pd.DataFrame()

In [None]:
for k in glob.glob('../temp_/*.csv'):
    df_accuracy = pd.concat([df_accuracy, pd.read_csv(k)])

In [None]:
df_accuracy.reset_index(inplace=True)

In [None]:
df_accuracy = df_accuracy.rename({'index': 'time'}, axis=1)
browUp = df_accuracy[(df_accuracy.letter_row == 3) & (df_accuracy.time > 10)][['browOuterUp_R', 'subject', 'time']]
mouthUp = df_accuracy[(df_accuracy.letter_row == 4) & (df_accuracy.time > 10)][['mouthSmile_R', 'subject', 'time']]
squint = df_accuracy[(df_accuracy.letter_row == 2) & (df_accuracy.time > 10)][['cheekSquint_R', 'subject', 'time']]
browUp = browUp.rename({'browOuterUp_R': 'value'}, axis=1)
mouthUp = mouthUp.rename({'mouthSmile_R': 'value'}, axis=1)
squint = squint.rename({'cheekSquint_R': 'value'}, axis=1)
browUp['action'] = 'brow raising'
mouthUp['action'] = 'mouth smiling'
squint['action'] = 'cheek squinting'

In [None]:
df_accuracy = pd.concat([browUp, mouthUp, squint])

In [None]:
df_accuracy.loc[:, 'value'] = df_accuracy.loc[:, ['value', 'subject']].groupby('subject').transform(zscore_normalize)

In [None]:
sns.set_context("notebook", font_scale=2.2)

In [None]:
df_accuracy['time_shifted'] = df_accuracy.groupby(['subject', 'action'])['time'].transform(lambda x: x - x.min())
df_accuracy.time_shifted = df_accuracy.time_shifted / 60.0

# Create a FacetGrid with larger subplots and adjust vertical space
g = sns.FacetGrid(df_accuracy, col="subject", row='action', sharex=False, height=5, aspect=0.8)
g.map(sns.lineplot, "time_shifted", "value")
g.set_titles("Subject {col_name}\n{row_name}")
#\nAction: {row_name}
for ax, title in zip(g.axes[:, 0], df_accuracy['action'].unique()):
    ax.set_ylabel(f'{title} signal')

g.set_xlabels('Time [s]')

# Adjust space between subplots
plt.subplots_adjust(hspace=0.45)  # Adjust the hspace parameter as needed

# Save the figure
g.savefig('../media/predefined_actions.png', dpi=300)
plt.show()

### Facial Landmarks as a Predictor of Readability

In [None]:
df

In [None]:
df.groupby("label_mult")["index"].nunique()

In [None]:
df_result_ = pd.DataFrame()
df_result_std = pd.DataFrame()
df_result_best = pd.DataFrame()

for mode in ["std", "mean", "max", "min"]:
    for squint in [True, False]:
        train_df = df[(df.suppress_squinting == squint)]
        columns_feature = [col for col in train_df.columns if (mode in col)]
        df_auc = pd.DataFrame(index=np.sort(train_df.subject.unique()), columns=columns_feature)
        for column in df_auc.columns:
            for sub in df_auc.index:
                values = train_df[(train_df.subject == sub)][column].values
                labels = train_df[(train_df.subject == sub)]["label_bin"].astype(int).values
                try:
                    auc = roc_auc_score(labels, values)
                    df_auc.loc[sub, column] = auc
                except:
                    df_auc.loc[sub, column] = np.nan
        df_result_best[mode] = df_auc.max(axis=1)
        heatmap_data = pd.DataFrame((df_auc[df_auc.sum(axis=0).sort_values().index]).astype(float).mean(axis=0)).transpose()
        heatmap_data.columns = heatmap_data.columns.str.replace(mode, '', regex=False)
        heatmap_data.index = [mode]
        df_result_ = pd.concat([df_result_, heatmap_data])

        if squint == True:
            df_auc_plt = df_auc
            df_auc_plt["squint"] = False
        else:
            df_auc_plt_ = df_auc
            df_auc_plt_["squint"] = True
    df_auc_plt = pd.concat([df_auc_plt, df_auc_plt_])
        
    df_auc_plt = df_auc_plt[df_auc_plt.sum(axis=0).sort_values().index]
    df_auc_plt.columns = df_auc_plt.columns.str.replace(mode, '', regex=False)
    df_auc_plt = df_auc_plt.melt(id_vars=['squint'], var_name='Blendshape', value_name='AUC Score')
    
    plt.figure(figsize=(40, 5))
    ax = sns.boxplot(data=df_auc_plt, x='Blendshape', y='AUC Score', hue='squint')
    plt.xticks(fontsize=30, rotation=90)
    plt.yticks(fontsize=30)
    plt.title(f"AUC scores for the blendshapes ({mode})", fontsize=30, pad=15)
    legend = ax.legend(title='Squinting allowed', loc='lower right')
    plt.savefig(f'../media/AUC_Blendshapes_{mode}.png', dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
np.round(df_result_[df_result_.sum(axis=0).sort_values().index], 2)

In [None]:
df_result_best