In [None]:



## Glasses

# %% md

### Frequency of visual aids

# %%
from utils.tables import write_csv


# Additional analyis

## Heatmap

## Exemplary trials for each position

## Median offset across calibration trials
Does data quality improve during calibration? 

In [None]:
data_et_calibration = data_et.loc[data_et['trial_type']=='eyetracking-calibration', :]
data_trial_calibration = data_trial \
    .loc[data_trial['trial_type']=='eyetracking-calibration', :]

# Add median offset 
grouped = data_et_calibration \
    .groupby(['run_id', 'trial_index'])['offset'].median() \
    .reset_index() \
    .rename(columns={'offset': 'offset_median'}) 
data_trial_calibration = data_trial_calibration.merge(
    grouped,
    on=['run_id', 'trial_index'],
    how='left'
)

In [None]:
spaghettiPlot(
    data_trial_calibration.loc[
        (
            (data_trial_calibration['chinFirst']==0) &
            (data_trial_calibration['chin']==0)
        ) |
        (
            (data_trial_calibration['chinFirst']==1) &
            (data_trial_calibration['chin']==1)
        )
        , :], 
    'withinTaskIndex', 'offset_median', 103)
plt.ylim(0, 1)
plt.title('Offset across calibration for chin==0', loc='center', fontsize=12, fontweight=0, color='grey')
plt.xlabel('withinTaskIndex')
plt.ylabel('Offset')

plt.show()

## Median precision across calibration trials

In [None]:
spaghettiPlot(
    data_trial_calibration.loc[
        (
            (data_trial_calibration['chinFirst']==0) &
            (data_trial_calibration['chin']==0)
        ) |
        (
            (data_trial_calibration['chinFirst']==1) &
            (data_trial_calibration['chin']==1)
        )
        , :], 
    'withinTaskIndex', 'precision', 103)
plt.ylim(0, 1)
plt.title('Precision across calibration for chin==0', loc='center', fontsize=12, fontweight=0, color='grey')
plt.xlabel('withinTaskIndex')
plt.ylabel('Precision')

plt.show()

## Categorical confounders

In [None]:
def getBoxPlots(outcome):
    predictors = [
    'vertPosition', 'gender', 'ethnic',  
    'degree', 'browser', 'glasses', 'sight', 'sight']  

    fig, ax = plt.subplots(nrows=2, ncols=4, figsize=(16, 10))
    fig.suptitle(outcome + ' for various categorical predictors', fontsize=20)
    plt.subplots_adjust(hspace=0.5)
        
    
    ax = ax.ravel()
    
    for i in range(0, 8):
        sns.boxplot(ax=ax[i], x=predictors[i], y=outcome, data=data_subject)
        
        ax[i].tick_params(labelrotation=45, labelsize=13)
        ax[i].tick_params(axis='y', labelrotation=None)
        
        nobs = data_subject[predictors[i]].value_counts().values
        nobs = [str(x) for x in nobs.tolist()]
        nobs = ["n: " + i for i in nobs]
        # Add it to the plot
        pos = range(len(nobs))
        
        max_value = data_subject[outcome].max()
        yPos = max_value + max_value * 0.1
        
        for tick,label in zip(pos,ax[i].get_xticklabels()):                
            ax[i].text(
                pos[tick], yPos, nobs[tick],
                verticalalignment='top',
                horizontalalignment='center', size=13, weight='normal')
            
getBoxPlots('offset')

In [None]:
getBoxPlots('precision')

In [None]:
getBoxPlots('fps')

## Correlation Plot

### Clean dataset

In [None]:
data_plot = data_trial_fix \
    .merge(data_subject.loc[:, ['run_id', 'age']], on='run_id', how='left') \
    .loc[:, ['run_id', 'chin', 'x_pos', 'y_pos', 'withinTaskIndex', 'age', 'fps', 'offset', 'precision']]
null_data = data_plot.loc[data_plot.isnull().any(axis=1), :]

if len(null_data)>0:
    print('! Attention ! Missing values')
    print(
        f"""Length of data raw: {len(data_plot)} \n"""
        f"""Excluded runs and trials: \n"""
        f"""{null_data.groupby(['run_id'], as_index=False)['chin'].count().rename(columns={'chin': 'n'})} \n"""
    )
else: 
    print('No missing data found')

data_plot = data_plot.loc[~data_plot.isnull().any(axis=1), :]

print(
        f"""Length of data clean: {len(data_plot)} \n""" 
)

### Plots

In [None]:
correlation_columns = ['withinTaskIndex', 'age', 'fps', 'offset', 'precision']
sns.set()
sns.pairplot(
    data_plot.loc[:, np.append(correlation_columns, ['chin'])], 
    hue='chin', hue_order = [0.0, 1.0],
    kind='reg', 
    corner=True
)

In [None]:
corr_columns = np.append(['x_pos', 'y_pos'], correlation_columns)
corr_matrix = np.corrcoef(data_plot[corr_columns].T)

smg.plot_corr(corr_matrix, xnames=corr_columns)
plt.show()

In [None]:
data_plot[corr_columns].rcorr()

In [None]:
data_plot[corr_columns].rcorr(upper='n')

# Export

In [None]:
if not os.path.exists('./data_jupyter/fix_task'):
    os.mkdir('./data_jupyter/fix_task')
    
data_et_fix.to_csv("data_jupyter/fix_task/data_et.csv", index=False, header=True)
data_trial_fix.to_csv("data_jupyter/fix_task/data_trial.csv", index=False, header=True)

data_subject.to_csv("data_jupyter/fix_task/data_subject.csv", index=False, header=True)

# Feedback

In [None]:
print('Success! Script ran through')