# Tratamiento del dataframe 
Se ha extraido de a base de datos un daframe en formato pickle

## Imports

In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from sql_face.tables import *
import os
import plotly.express as px
import plotly.io as pio


## Leer dataframe del disco
se especifica directorio y fichero pkl. No necesitamos la base de datos.

In [2]:
home = os.path.expanduser("~")

input_dir = os.path.join(home, 'video_resources', 'sql_database')
df = pd.read_pickle(os.path.join(input_dir,'df_colab_20.pkl'))
output_dir = os.path.join(home, 'video_resources', 'sunburst_20')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)


In [3]:
df.columns

Index(['index', 'identity', 'pitch', 'angle_pitch', 'yaw', 'angle_yaw', 'roll',
       'angle_roll', 'source', 'gender', 'headgear', 'age', 'age_number',
       'glasses', 'type', 'beard', 'emotion', 'other_occlusions', 'race',
       'image_id', 'Detector', 'E_Model', 'Q_Model', 'Q_group', 'Quality'],
      dtype='object')

In [4]:
df.E_Model.value_counts()

QMagFace    484136
Name: E_Model, dtype: int64

### Quitamos los Embedding Models
y los registros repetidos

In [5]:
df2 = df.copy()
# #df2 = df.loc[df['type'] == 'enfsiImage']
# df2.drop('E_Model', axis = 1,inplace = True)
# df2 = df2.drop_duplicates(subset = ['image_id','Detector','Q_model'])
# df2 = df2.reset_index(drop=True)

In [6]:
len(df2)

484136

In [7]:

df2.Detector.value_counts()

mtcnn           229466
mtcnn_serfiq    158437
mediapipe        96233
Name: Detector, dtype: int64

In [8]:
df2.Q_Model.value_counts()

ser_fiq    286623
tface      197513
Name: Q_Model, dtype: int64

### Nos quedamos con Un solo detector y quality Model

In [9]:
df3 = df2.loc[(df2.Detector == 'mtcnn_serfiq') & (df2.Q_Model=='ser_fiq')]
df3.reset_index(drop=True, inplace = True)


In [10]:
df3

Unnamed: 0,index,identity,pitch,angle_pitch,yaw,angle_yaw,roll,angle_roll,source,gender,...,beard,emotion,other_occlusions,race,image_id,Detector,E_Model,Q_Model,Q_group,Quality
0,3882,060,Pitch.HALF_UP,,Yaw.FRONTAL,,Roll.FRONTAL,,SCFace,Gender.MALE,...,False,Emotion.NEUTRAL,,Race.WHITE,391,mtcnn_serfiq,QMagFace,ser_fiq,QualityGroup.VERY_HIGH,0.820794
1,3883,030,Pitch.HALF_DOWN,,Yaw.FRONTAL,,Roll.FRONTAL,,SCFace,Gender.MALE,...,False,Emotion.NEUTRAL,,Race.WHITE,392,mtcnn_serfiq,QMagFace,ser_fiq,QualityGroup.LOW,0.022044
2,3884,047,Pitch.DOWN,,Yaw.FRONTAL,,Roll.FRONTAL,,SCFace,Gender.MALE,...,False,Emotion.NEUTRAL,,Race.WHITE,393,mtcnn_serfiq,QMagFace,ser_fiq,QualityGroup.VERY_HIGH,0.869096
3,3885,014,Pitch.FRONTAL,,Yaw.PROFILE,,,,SCFace,Gender.FEMALE,...,False,Emotion.NEUTRAL,,Race.WHITE,394,mtcnn_serfiq,QMagFace,ser_fiq,QualityGroup.LOW,0.038777
4,3886,035,Pitch.FRONTAL,,Yaw.FRONTAL,,Roll.HALF_LEANING,,SCFace,Gender.MALE,...,True,Emotion.NEUTRAL,,Race.WHITE,395,mtcnn_serfiq,QMagFace,ser_fiq,QualityGroup.VERY_HIGH,0.841657
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114996,317,2015-17,Pitch.FRONTAL,-3.240460,Yaw.PROFILE,-36.129102,Roll.FRONTAL,1.442869,ENFSI,Gender.FEMALE,...,,Emotion.NEUTRAL,,Race.WHITE,8589,mtcnn_serfiq,QMagFace,ser_fiq,QualityGroup.VERY_HIGH,0.808182
114997,318,2015-17,Pitch.FRONTAL,-6.424410,Yaw.PROFILE,25.806312,Roll.FRONTAL,0.167557,ENFSI,Gender.FEMALE,...,,Emotion.SAD,,Race.WHITE,8590,mtcnn_serfiq,QMagFace,ser_fiq,QualityGroup.VERY_HIGH,0.774945
114998,319,2015-17,Pitch.DOWN,22.755557,Yaw.PROFILE,21.899967,Roll.HALF_LEANING,10.146845,ENFSI,Gender.FEMALE,...,,Emotion.FEAR,,Race.WHITE,8591,mtcnn_serfiq,QMagFace,ser_fiq,QualityGroup.VERY_HIGH,0.840112
114999,320,2015-17,Pitch.FRONTAL,-7.860651,Yaw.FRONTAL,-7.253606,Roll.FRONTAL,2.571381,ENFSI,Gender.FEMALE,...,,Emotion.FEAR,,Race.WHITE,8592,mtcnn_serfiq,QMagFace,ser_fiq,QualityGroup.VERY_HIGH,0.875079


### Convertimos en string los Enums y nos quedamos con las columnas que queremos

In [11]:
#df3 = df2.copy()
#convertimos a String los Enum
variables = ['gender','yaw','pitch','emotion','race']#'Q_group','pitch','roll',
#df_plot = df.Quality
for var in variables:
    df3[var] = df3[var].apply(lambda x: x.value if x else None)


variables += ['source','Quality','Detector', 'E_Model','Q_Model']
df_plot = df3[variables]

In [12]:
df3.columns

Index(['index', 'identity', 'pitch', 'angle_pitch', 'yaw', 'angle_yaw', 'roll',
       'angle_roll', 'source', 'gender', 'headgear', 'age', 'age_number',
       'glasses', 'type', 'beard', 'emotion', 'other_occlusions', 'race',
       'image_id', 'Detector', 'E_Model', 'Q_Model', 'Q_group', 'Quality'],
      dtype='object')

In [13]:
df_plot.columns

Index(['gender', 'yaw', 'pitch', 'emotion', 'race', 'source', 'Quality',
       'Detector', 'E_Model', 'Q_Model'],
      dtype='object')

In [14]:
df_plot[['Q_Model','E_Model','Detector','source']].groupby(['Q_Model','E_Model','Detector']).value_counts()

Q_Model  E_Model   Detector      source    
ser_fiq  QMagFace  mtcnn_serfiq  ChokePoint    71565
                                 UTKface       23955
                                 XQLFW         13140
                                 SCFace         3841
                                 ForenFace      2342
                                 ENFSI           158
dtype: int64

## definimos la función de ploteo
-['gender', 'yaw', 'pitch', 'race']

In [15]:
def plot_sun(source,e_model):
    df_x = df_plot.loc[((df_plot.source==source) & (df_plot.E_Model==e_model))]
    df_x.dropna(inplace=True)
    fig = px.sunburst(df_x, path=['gender', 'yaw', 'pitch', 'race'],   color="Quality",
                  color_continuous_scale="Viridis",
                  range_color=[0, 1],title=f'{source}, Model {e_model}') #values="gender"#"Q_model", "Detector",
    #fig.update_layout(title=source)
    #img_data = pio.to_image(fig,format='png',scale=3)

    pio.write_image(fig,os.path.join(output_dir,source+"_sunburst_21.png"),format='png', scale=6)
    fig.show()

In [16]:
df_y = df_plot.loc[((df_plot.source=='UTKface') & (df_plot.E_Model=='QMagFace_SR'))]
df_y.to_excel(os.path.join(output_dir,'UTKFace_plot.xlsx'))

In [17]:
df_y.gender.value_counts()

Series([], Name: gender, dtype: int64)

## Definimos las sources que queremos y hacemos el plot (y se guarden en disco)

In [22]:
sources = [ 'ForenFace', 'ChokePoint', 'SCFace',  'XQLFW']

In [23]:
plot_sun('ForenFace','QMagFace_SR')

In [24]:
#for source in dict.keys():
for source in np.unique(df_plot.source):
    for model in np.unique(df_plot.E_Model):
        plot_sun(source,model)

### de aqui para abajo son pruebas


In [21]:
source = 'XQLFW'
df_plot = dict[source]
df_plot.dropna(inplace=True)

TypeError: 'type' object is not subscriptable

In [None]:

# Ajustar el tamaño de la figura
plt.figure(figsize=(10, 6))

# Crear el stripplot
sns.swarmplot(x='yaw', hue='emotion', y= 'Quality',  data=df_plot)#,jitter=True,dodge=True,
#sns.scatterplot(x='yaw2', hue='emotion2', y= 'Quality',  data=df2)#,dodge=True,
# Mostrar el gráfico
plt.show()


In [None]:
sns.swarmplot(x='race', hue='gender', y= 'Quality',  data=df_plot)#,jitter=True,dodge=True,

In [None]:
import plotly.express as px

fig = px.treemap(df_plot, path=['Q_group', 'Q_model', 'Detector', 'gender'], title="Mosaic Plot")
fig.show()

In [None]:


# # Tabla de contingencia para Quality_group y Detector
# contingency_table_detector = pd.crosstab(df_plot['Q_group'], df_plot['Detector'])

# # Tabla de contingencia para Quality_group y Quality_model
# contingency_table_quality_model = pd.crosstab(df_plot['Q_group'], df_plot['Q_model'])

# # Tabla de contingencia para Quality_group y yaw
# contingency_table_yaw = pd.crosstab(df_plot['Q_group'], df_plot['yaw'])

# # Tabla de contingencia para Quality_group y gender
# contingency_table_gender = pd.crosstab(df_plot['Q_group'], df_plot['gender'])


# # Tabla de contingencia para Quality_group y emotion
# contingency_table_emotion = pd.crosstab(df_plot['Q_group'], df_plot['emotion'])

# # Tabla de contingencia para Quality_group y race
# contingency_table_race = pd.crosstab(df_plot['Q_group'], df_plot['race'])


In [None]:

# def norm_table (df):
#     for col in df.columns:
#         df[col] = df[col]/sum(df[col])



In [None]:
# norm_table(contingency_table_quality_model)


In [None]:
# norm_table(contingency_table_detector)
# contingency_table_detector

In [None]:
# norm_table(contingency_table_gender)
# contingency_table_gender

In [None]:
# norm_table(contingency_table_yaw)
# contingency_table_yaw

In [None]:
# norm_table(contingency_table_race)
# contingency_table_race

In [None]:
# norm_table(contingency_table_emotion)
# contingency_table_emotion

In [None]:
# contingency_table_emotion.plot(kind='bar', subplots=False)
# plt.legend(loc='best')


In [None]:
# contingency_table_race.plot(kind='bar', subplots=False)
# plt.legend(loc='best')


In [None]:
# contingency_table_yaw.plot(kind='bar', subplots=False)
# plt.legend(loc='best')


In [None]:
import matplotlib.pyplot as plt

def plot_stacked_bar(contingency_table, title, xlabel, ylabel):
    ax = contingency_table.plot(kind='bar', stacked=True, figsize=(10, 7))
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend(loc='best')
    plt.show()

# plot_stacked_bar(contingency_table_detector, 'Quality_group vs Detector', 'Quality_group', 'Frequency')
# plot_stacked_bar(contingency_table_quality_model, 'Quality_group vs Quality_model', 'Quality_group', 'Frequency')
# plot_stacked_bar(contingency_table_yaw, 'Quality_group vs yaw', 'Quality_group', 'Frequency')
# plot_stacked_bar(contingency_table_gender, 'Quality_group vs gender', 'Quality_group', 'Frequency')


In [None]:
df_plot.head()


In [None]:
import plotly.io as pio

#fig = ...  # tu figura de plotly aquí
#pio.write_image(fig, 'figure.png', format='png', scale=2) 

In [None]:
# Crear un gráfico sunburst con Plotly
fig1 = px.sunburst(df_plot1, path=['race','gender'],   color="Quality",
                  color_continuous_scale="Viridis") #values="gender"#"Q_model", "Detector",
fig2 = px.sunburst(df_plot1, path=['yaw','emotion'],   color="Quality",
                  color_continuous_scale="Viridis") #values="gender"#"Q_model", "Detector",
fig3 = px.sunburst(df_plot1, path=['Detector','yaw','emotion'],   color="Quality",
                  color_continuous_scale="Viridis") #values="gender"#"Q_model", "Detector",
fig4 = px.sunburst(df_plot1, path=['gender','race','yaw','emotion'],   color="Quality",
                  color_continuous_scale="Viridis") #values="gender"#"Q_model", "Detector",
fig5 = px.sunburst(df_plot1, path=['Detector','gender','yaw','race','emotion'],   color="Quality",
                  color_continuous_scale="Viridis") #values="gender"#"Q_model", "Detector",

fig1.show()
fig2.show()
fig3.show()
fig4.show()
fig5.show()

In [None]:
# Crear un gráfico sunburst con Plotly
fig1 = px.sunburst(df_plot1, path=['race','gender'],   color="Quality",
                  color_continuous_scale="Viridis") #values="gender"#"Q_model", "Detector",
fig2 = px.sunburst(df_plot1, path=['yaw','emotion'],   color="Quality",
                  color_continuous_scale="Viridis") #values="gender"#"Q_model", "Detector",
fig3 = px.sunburst(df_plot1, path=['Detector','yaw','emotion'],   color="Quality",
                  color_continuous_scale="Viridis") #values="gender"#"Q_model", "Detector",
fig4 = px.sunburst(df_plot1, path=['gender','race','yaw','emotion'],   color="Quality",
                  color_continuous_scale="Viridis") #values="gender"#"Q_model", "Detector",
fig5 = px.sunburst(df_plot1, path=['Detector','gender','yaw','race','emotion'],   color="Quality",
                  color_continuous_scale="Viridis") #values="gender"#"Q_model", "Detector",

fig1.show()
fig2.show()
fig3.show()
fig4.show()
fig5.show()

In [None]:
tabla1 =df_plot1.groupby(['Q_model','Detector',"gender",'race','yaw','emotion']).agg({"Quality":["count","mean"]})

In [None]:
type(tabla)

In [None]:
tabla1.to_excel("tabla_serfiq.xlsx")

In [None]:
df.angle_yaw

In [None]:
df_plot.yaw.value_counts()