# Min-max normalization dividing by max value

In [17]:
import pandas as pd
import numpy as np

# Datos de la primera tabla: Índice de dificultad (DI)
difficulty_index_data = {
    'User': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12],
    'Interaction type': ['U', 'A', 'U', 'A', 'U', 'A', 'U', 'A',
                         'U', 'A', 'U', 'A', 'U', 'A', 'U', 'A',
                         'U', 'A', 'U', 'A', 'U', 'A', 'U', 'A'],
    'DI': [0.49, 0.27, 0.36, 0.31, 0.35, 0.52, 0.36, 0.15, 0.46, 0.27, 0.56, 0.37, 
           0.61, 0.31, 0.53, 0.19, 0.4, 0.24, 0.68, 0.22, 0.36, 0.3, 0.33, 0.24]
}

# Datos de la segunda tabla: Número de intentos por tarea
formatted_data_tries = {
    'Task': ['#1', '#2', '#3', '#4', '#5', '#6', '#7', '#8', '#9', '#10'],
    'U1_U': [17, 1, 4, 8, 3, 12, 2, 9, 3, 9],
    'U1_A': [1, 1, 4, 2, 1, 5, 1, 4, 2, 7],
    'U2_U': [4, 2, 4, 10, 1, 13, 2, 11, 2, 12],
    'U2_A': [2, 1, 8, 2, 3, 5, 1, 6, 2, 4],
    'U3_U': [8, 1, 4, 15, 1, 17, 1, 10, 4, 11],
    'U3_A': [1, 1, 3, 1, 1, 6, 1, 6, 2, 7],
    'U4_U': [26, 1, 4, 3, 1, 10, 3, 15, 3, 10],
    'U4_A': [2, 1, 6, 1, 1, 4, 2, 5, 4, 5],
    'U5_U': [19, 2, 9, 17, 2, 0, 3, 0, 4, 2],
    'U5_A': [3, 2, 6, 3, 1, 3, 1, 4, 2, 5],
    'U6_U': [10, 1, 5, 5, 3, 17, 2, 9, 4, 10],
    'U6_A': [2, 1, 3, 1, 3, 6, 1, 4, 2, 7],
    'U7_U': [9, 1, 5, 6, 7, 0, 4, 0, 5, 5],
    'U7_A': [1, 1, 4, 1, 1, 3, 2, 6, 2, 7],
    'U8_U': [7, 1, 0, 18, 3, 14, 5, 0, 2, 3],
    'U8_A': [2, 1, 5, 3, 1, 8, 3, 6, 1, 9],
    'U9_U': [9, 1, 4, 8, 1, 9, 2, 9, 4, 9],
    'U9_A': [1, 1, 4, 3, 1, 4, 1, 5, 2, 7],
    'U10_U': [14, 1, 6, 16, 2, 13, 3, 4, 4, 7],
    'U10_A': [1, 1, 5, 1, 3, 6, 1, 4, 2, 10],
    'U11_A': [3, 2, 7, 1, 1, 3, 1, 4, 2, 4],
    'U11_U': [3, 1, 3, 4, 1, 7, 3, 8, 2, 10],
    'U12_U': [6, 1, 4, 5, 1, 7, 3, 10, 2, 9],
    'U12_A': [2, 1, 6, 2, 1, 6, 1, 7, 2, 7]
}

data = {
    'Task': ['#1', '#2', '#3', '#4', '#5', '#6', '#7', '#8', '#9', '#10'],
    'U1_U': [30.50, 13.50, 33.90, 22.93, 19.67, 36.96, 16.46, 24.87, 18.94, 34.14],
    'U1_A': [8.40, 6.50, 13.38, 10.97, 10.46, 13.51, 8.01, 18.61, 14.22, 28.56],
    'U2_U': [26.20, 8.99, 32.04, 30.39, 12.19, 42.89, 13.64, 43.37, 13.61, 48.88],
    'U2_A': [5.57, 7.46, 16.01, 6.19, 10.15, 10.87, 8.40, 21.40, 16.00, 18.96],
    'U3_U': [20.05, 13.46, 36.88, 43.16, 16.65, 36.03, 12.50, 45.08, 13.68, 28.00],
    'U3_A': [5.97, 7.97, 7.57, 8.33, 8.94, 23.81, 10.10, 30.35, 10.53, 20.89],
    'U4_U': [22.36, 12.07, 38.51, 16.36, 15.00, 27.37, 36.78, 30.00, 14.04, 36.94],
    'U4_A': [6.46, 7.01, 13.70, 12.69, 7.83, 18.90, 7.17, 16.47, 11.56, 23.29],
    'U5_U': [51.24, 9.40, 62.34, 73.04, 18.04, 46.19, 36.78, 55.52, 17.04, 49.04],
    'U5_A': [7.68, 9.12, 18.76, 22.48, 12.40, 21.30, 8.96, 27.37, 17.17, 35.58],
    'U6_U': [59.65, 10.75, 43.82, 22.64, 25.18, 58.87, 21.32, 46.93, 15.35, 75.45],
    'U6_A': [6.48, 7.82, 10.60, 15.05, 8.46, 22.80, 7.67, 20.54, 11.03, 30.43],
    'U7_U': [70.09, 12.90, 76.47, 28.37, 14.14, 26.44, 52.54, 25.37, 11.83, 49.79],
    'U7_A': [5.86, 9.97, 13.90, 16.39, 12.05, 16.29, 11.99, 18.10, 12.49, 42.40],
    'U8_U': [43.10, 10.13, 96.13, 74.80, 24.32, 105.51, 35.23, 46.23, 27.43, 64.38],
    'U8_A': [9.97, 6.90, 18.77, 13.03, 8.86, 22.01, 16.99, 33.32, 8.05, 37.19],
    'U9_U': [22.36, 12.97, 29.18, 23.47, 18.78, 22.18, 13.35, 43.22, 15.64, 44.99],
    'U9_A': [5.47, 7.39, 12.68, 8.68, 9.94, 20.50, 9.44, 20.73, 10.68, 28.40],
    'U10_U': [34.16, 12.33, 69.03, 64.53, 9.31, 71.67, 19.32, 26.56, 16.93, 57.93],
    'U10_A': [8.43, 7.26, 18.10, 19.42, 10.26, 28.47, 6.87, 23.36, 14.46, 44.47],
    'U11_U': [8.89, 4.29, 9.59, 5.17, 5.82, 10.33, 5.85, 10.72, 8.29, 20.00],
    'U11_A': [6.73, 7.37, 11.76, 5.71, 6.15, 10.04, 7.17, 15.43, 10.18, 14.90],
    'U12_U': [11.09, 6.39, 11.99, 7.24, 7.36, 14.42, 10.90, 17.65, 7.85, 21.01],
    'U12_A': [5.01, 8.15, 6.08, 6.43, 6.95, 12.03, 8.15, 18.43, 12.89, 24.50]
}

df_incomplete = pd.DataFrame({
    'Task': ['#1', '#2', '#3', '#4', '#5', '#6', '#7', '#8', '#9', '#10'],
    'U1_U':  [1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
    'U1_A':  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'U2_U':  [1, 0, 1, 0, 1, 1, 0, 1, 1, 1],
    'U2_A':  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'U3_U':  [0, 0, 1, 1, 0, 1, 0, 1, 0, 1],
    'U3_A':  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'U4_U':  [0, 0, 1, 0, 0, 1, 0, 1, 0, 1],
    'U4_A':  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'U5_U':  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
    'U5_A':  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'U6_U':  [1, 1, 1, 1, 1, 1, 1, 1, 0, 1],
    'U6_A':  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'U7_U':  [1, 0, 1, 1, 0, 1, 1, 1, 0, 1],
    'U7_A':  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'U8_U':  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
    'U8_A':  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'U9_U':  [0, 0, 1, 0, 0, 1, 0, 1, 0, 1],
    'U9_A':  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'U10_U': [1, 0, 1, 1, 0, 1, 0, 1, 1, 1],
    'U10_A': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'U11_U': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'U11_A': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'U12_U': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'U12_A': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
})

# Crear el DataFrame del índice de dificultad
df_di = pd.DataFrame(difficulty_index_data)

# Crear los DataFrames originales
df_tries = pd.DataFrame(formatted_data_tries)
df_data = pd.DataFrame(data)

df_tries_numeric = df_tries.drop(columns=['Task'])
df_data_numeric = df_data.drop(columns=['Task'])

# Normalización por el valor máximo de cada columna
df_tries_normalized = df_tries_numeric / df_tries_numeric.max()
df_data_normalized = df_data_numeric / df_data_numeric.max()

# Crear un nuevo DataFrame para almacenar el esfuerzo calculado
df_effort = pd.DataFrame()

# Aplicar las fórmulas de producto de variables y de índice ponderado para cada usuario
for user in range(1, 13):
    adapted_col = f'U{user}_A'
    unadapted_col = f'U{user}_U'

    # Obtener el DI para el entorno adaptado y no adaptado
    di_adapted = df_di.loc[(df_di['User'] == user) & (df_di['Interaction type'] == 'A'), 'DI'].values[0]
    di_unadapted = df_di.loc[(df_di['User'] == user) & (df_di['Interaction type'] == 'U'), 'DI'].values[0]
    print(di_adapted, di_unadapted)

    # df_effort[f'U{user}_A_Product'] = (df_tries_normalized[adapted_col] * df_data_normalized[adapted_col] * di_adapted) + df_incomplete[adapted_col]
    # df_effort[f'U{user}_U_Product'] = (df_tries_normalized[unadapted_col] * df_data_normalized[unadapted_col] * di_unadapted) + df_incomplete[unadapted_col]

    # Índice ponderado para el entorno adaptado y no adaptado
    df_effort[f'U{user}_A_Weighted'] = ((df_tries_normalized[adapted_col] * df_data_normalized[adapted_col]) * (di_adapted)) + df_incomplete[adapted_col]
    df_effort[f'U{user}_U_Weighted'] = ((df_tries_normalized[unadapted_col] * df_data_normalized[unadapted_col]) * (di_unadapted)) + df_incomplete[unadapted_col]

# Agregar de nuevo la columna 'Task' para referencia
df_effort['Task'] = df_tries['Task']

# Reorganizar las columnas para que 'Task' esté al principio
df_effort = df_effort[['Task'] + [col for col in df_effort.columns if col != 'Task']]
df_effort = df_effort.round(2)

0.27 0.49
0.31 0.36
0.52 0.35
0.15 0.36
0.27 0.46
0.37 0.56
0.31 0.61
0.19 0.53
0.24 0.4
0.22 0.68
0.3 0.36
0.24 0.33


# Z-score normalization

In [11]:
df_tries_zscore = (df_tries_numeric - df_tries_numeric.mean()) / df_tries_numeric.std()
df_data_zscore = (df_data_numeric - df_data_numeric.mean()) / df_data_numeric.std()

# Crear un nuevo DataFrame para almacenar el esfuerzo calculado
df_effort = pd.DataFrame()

# Aplicar las fórmulas de producto de variables y de índice ponderado para cada usuario
for user in range(1, 13):
    adapted_col = f'U{user}_A'
    unadapted_col = f'U{user}_U'

    # Obtener el DI para el entorno adaptado y no adaptado
    di_adapted = df_di.loc[(df_di['User'] == user) & (df_di['Interaction type'] == 'A'), 'DI'].values[0]
    di_unadapted = df_di.loc[(df_di['User'] == user) & (df_di['Interaction type'] == 'U'), 'DI'].values[0]

    # Producto de variables para el entorno adaptado y no adaptado
    df_effort[f'U{user}_A_Product'] = df_tries_zscore[adapted_col] * df_data_zscore[adapted_col] * di_adapted
    df_effort[f'U{user}_U_Product'] = df_tries_zscore[unadapted_col] * df_data_zscore[unadapted_col] * di_unadapted

    # Índice ponderado para el entorno adaptado y no adaptado
    df_effort[f'U{user}_A_Weighted'] = (df_tries_zscore[adapted_col] + df_data_zscore[adapted_col]) * (1 + di_adapted)
    df_effort[f'U{user}_U_Weighted'] = (df_tries_zscore[unadapted_col] + df_data_zscore[unadapted_col]) * (1 + di_unadapted)

# Agregar de nuevo la columna 'Task' para referencia
df_effort['Task'] = df_tries['Task']

# Reorganizar las columnas para que 'Task' esté al principio
df_effort = df_effort[['Task'] + [col for col in df_effort.columns if col != 'Task']]

# Redondear los resultados a 2 cifras decimales
df_effort = df_effort.round(2)