In [None]:
import pandas as pd
import seaborn as sns
import cv2
import matplotlib.pyplot as plt

# Analysis

In [None]:
filepath = '/Users/danielfiuzadosil/Documents/GitHub_Repo/IPD_measuring_app/photos/output/photos_statistics_1259.csv'

In [None]:
df = pd.read_csv(filepath)

In [None]:
print(len(df))
df.columns

In [None]:
df['target'].value_counts()

In [None]:
df['resolution_px'] = (df['img_height']*df['img_width'])
df['resolution_mpx'] = df['resolution_px'].div(pow(10,6))
df['top_to_bottom_angle_abs'] = abs(df['top_to_bottom_angle'])
df['silhoutte'] = df['area_px_left_silhoutte'] + df['area_px_right_silhoutte']
df['area_face_to_size'] = df['silhoutte']/df['resolution_px']
df['height_to_width_face_ratio'] = df['height_face_px']/df['width_face_px']
for i in range(len(df['top_to_bottom_angle'])):
    angle = df['top_to_bottom_angle'].iloc[i]
    if angle > 0:
        angle = angle
    else:
        angle = 180 + angle
    df.at[i,'top_to_bottom_angle_corrected'] = angle
    df.at[i,'top_to_bottom_angle_offset'] = angle - 90

In [None]:
print(df['area_right_to_left_silhoutte'].mean())
print(df['area_right_to_left_silhoutte'].std())

In [None]:
df[df['area_right_to_left_silhoutte']>16]

In [None]:
print(df[df['area_right_to_left_silhoutte']>16]['area_right_to_left_silhoutte'])

In [None]:
def plot_and_save_histogram(df, column):
    output_path = 'photos/output/plots/'
    # 
    filepath = output_path + column + '.pdf'
    plt.figure()
    sns.histplot(df, x = column, stat='density',multiple="stack",kde=False, bins=50, alpha = 0.3)
    sns.kdeplot(data=df, x= column)
    plt.xlabel(column)
    plt.ylabel("Density")
    plt.savefig(filepath,transparent=True,bbox_inches='tight')


In [None]:
def plot_and_save_histogram_with_category(df, column, category = 'target'):
    output_path = 'photos/output/plots/'
    # 
    filepath = output_path + column + '_good_vs_bad.pdf'
    plt.figure()
    sns.histplot(df, x = column, hue=category, stat='density',multiple="stack",kde=False, bins=50, alpha = 0.3)
    sns.kdeplot(data=df, x= column, hue=category)
    plt.xlabel(column)
    plt.ylabel("Density")
    plt.savefig(filepath,transparent=True,bbox_inches='tight')

In [None]:
def get_percentiles(df, column):
    quant_5, quant_25, quant_50, quant_75, quant_95 = df[column].quantile(0.05), df[column].quantile(0.25), df[column].quantile(0.5), df[column].quantile(0.75), df[column].quantile(0.95)
    return quant_5, quant_25, quant_50, quant_75, quant_95

In [None]:
def plot_and_save_histogram_with_percentiles(df, column):
    output_path = 'photos/output/plots/'
    # 
    fig, ax = plt.subplots()
    sns.histplot(df, x = column, stat='density',multiple="stack",kde=False, bins=50, alpha = 0.3)
    # sns.kdeplot(data=df, x= column)
    plt.xlabel(column)
    plt.ylabel("Density")
    y_limit = plt.gca().get_ylim()
    pos_lines = y_limit[1]*0.8
    pos_text = y_limit[1]*0.9
    # Calculate percentiles
    quant_5, quant_25, quant_50, quant_75, quant_95 = get_percentiles(df, column)
    alpha_val_line = 0.8
    quants = [[quant_5, alpha_val_line, pos_lines], [quant_25, alpha_val_line, pos_lines], [quant_50, alpha_val_line, pos_lines],  [quant_75, alpha_val_line, pos_lines], [quant_95, alpha_val_line, pos_lines]] # [quantile, opacity, length]
    # Lines
    for i in quants:
        ax.axvline(i[0], alpha = i[1], ymax = i[2], linestyle = ":", color = 'k')
    # Text
    alpha_val = 1
    size_val = 10
    ax.text(quant_5, pos_text, "|5th", size = size_val, alpha = alpha_val)
    ax.text(quant_25, pos_text, "|25th", size = size_val, alpha = alpha_val)
    ax.text(quant_50, pos_text, "|50th", size = size_val, alpha = alpha_val)
    ax.text(quant_75, pos_text, "|75th", size = size_val, alpha = alpha_val)
    ax.text(quant_95, pos_text, "|95th", size = size_val, alpha = alpha_val)
    # 
    filepath = output_path + column + '_percentiles.pdf'
    plt.savefig(filepath,transparent=True,bbox_inches='tight')

In [None]:
plot_and_save_histogram_with_percentiles(df, 'height_to_width_face_ratio')
plot_and_save_histogram_with_percentiles(df, 'area_face_to_size')
plot_and_save_histogram_with_percentiles(df, 'resolution_mpx')
plot_and_save_histogram_with_percentiles(df, 'img_height')
plot_and_save_histogram_with_percentiles(df, 'img_width')
plot_and_save_histogram_with_percentiles(df, 'left_to_right_angle')
plot_and_save_histogram_with_percentiles(df, 'top_to_bottom_angle_corrected')
plot_and_save_histogram_with_percentiles(df, 'top_to_bottom_angle_offset')
plot_and_save_histogram_with_percentiles(df, 'nose_to_right_cheek_angle')
plot_and_save_histogram_with_percentiles(df, 'left_cheek_to_nose_angle')
plot_and_save_histogram_with_percentiles(df, 'ipd_px')
plot_and_save_histogram_with_percentiles(df, 'width_face_px')
plot_and_save_histogram_with_percentiles(df, 'height_face_px')
plot_and_save_histogram_with_percentiles(df, 'area_right_to_left_silhoutte')
plot_and_save_histogram_with_percentiles(df, 'area_silhoutte')

In [None]:
plot_and_save_histogram_with_category(df, 'height_to_width_face_ratio')
plot_and_save_histogram_with_category(df, 'area_face_to_size')
plot_and_save_histogram_with_category(df, 'resolution_mpx')
plot_and_save_histogram_with_category(df, 'img_height')
plot_and_save_histogram_with_category(df, 'img_width')
plot_and_save_histogram_with_category(df, 'left_to_right_angle')
plot_and_save_histogram_with_category(df, 'top_to_bottom_angle_corrected')
plot_and_save_histogram_with_category(df, 'top_to_bottom_angle_offset')
plot_and_save_histogram_with_category(df, 'nose_to_right_cheek_angle')
plot_and_save_histogram_with_category(df, 'left_cheek_to_nose_angle')
plot_and_save_histogram_with_category(df, 'ipd_px')
plot_and_save_histogram_with_category(df, 'width_face_px')
plot_and_save_histogram_with_category(df, 'height_face_px')
plot_and_save_histogram_with_category(df, 'area_right_to_left_silhoutte')
plot_and_save_histogram_with_category(df, 'area_silhoutte')

In [None]:
plot_and_save_histogram(df, 'height_to_width_face_ratio')
plot_and_save_histogram(df, 'area_face_to_size')
plot_and_save_histogram(df, 'resolution_mpx')
plot_and_save_histogram(df, 'img_height')
plot_and_save_histogram(df, 'img_width')
plot_and_save_histogram(df, 'left_to_right_angle')
plot_and_save_histogram(df, 'top_to_bottom_angle_corrected')
plot_and_save_histogram(df, 'top_to_bottom_angle_offset')
plot_and_save_histogram(df, 'nose_to_right_cheek_angle')
plot_and_save_histogram(df, 'left_cheek_to_nose_angle')
plot_and_save_histogram(df, 'ipd_px')
plot_and_save_histogram(df, 'width_face_px')
plot_and_save_histogram(df, 'height_face_px')
plot_and_save_histogram(df, 'area_right_to_left_silhoutte')
plot_and_save_histogram(df, 'area_silhoutte')

In [None]:
from scipy.stats import pearsonr
def reg_coef(x,y,label=None,color=None,**kwargs):
    ax = plt.gca()
    r,p = pearsonr(x,y)
    ax.annotate('r = {:.2f}'.format(r), xy=(0.5,0.5), xycoords='axes fraction', ha='center')
    ax.set_axis_off()

In [None]:
pairplot = False

In [None]:
if pairplot == True:
    g = sns.pairplot(df, hue='target',plot_kws=dict(alpha=0.3))
    # # g.map_lower(sns.regplot)
    # g.map_upper(reg_coef)
    plt.savefig('photos/output/plots/pairplot.pdf')

In [None]:
# g = sns.pairplot(df, hue='target',plot_kws=dict(alpha=0.3))
# # # g.map_lower(sns.regplot)
# g.map_upper(reg_coef)
# plt.savefig('photos/output/plots/pairplot_with_coeff.pdf')