### Extracción de Datos

In [798]:
# Importar librerías
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity

In [799]:
# Extraer datos de tablas
products_df_raw = pd.read_csv('20230223_productos.csv')
sales_df = pd.read_csv('ventas.csv')

### Limpieza de Datos

#### Productos

In [800]:
# Definir columnas relevantes de la tabla de productos
relevant_columns = [
        "Material", "Productos_Por_Empaque", "MLSize", "Returnability", 
        "GlobalFlavor", "Container", "GlobalCategory", "BrandGrouper", 
        "Presentation", "Brand"
    ]

# Extraer solo columnas relevantes de la tabla de productos
products_df = products_df_raw[relevant_columns]

products_df.head()

Unnamed: 0,Material,Productos_Por_Empaque,MLSize,Returnability,GlobalFlavor,Container,GlobalCategory,BrandGrouper,Presentation,Brand
0,9465,6,600,NO RETORNABLE,LIMÓN,PLASTICO,AGUA,CIEL,600 ML NR,CIEL EXPRIM
1,14450,6,500,NO RETORNABLE,ARÁNDANO ACAÍ,PLASTICO,BEBIDAS EMERGENTES,POWERADE,500 ML NR PET,POWERADE FIT
2,148,24,500,RETORNABLE,UVA,VIDRIO,REFRESCOS,FANTA,500 ML RET,FANTA
3,1088,4,3800,NO RETORNABLE,SIN SABOR,PLASTICO,AGUA,SIERRAZUL,GALÓN 3.8 LTS.,SIERRAZUL AGUA PURIFICADA
4,2142,6,340,NO RETORNABLE,SIN SABOR,VIDRIO,AGUA,TOPO CHICO,12 OZ. NR VIDRIO,TOPO CHICO A.M.


### Transformación de Datos

#### Productos

##### Extraer gramos

In [801]:
# Función para convertir columna de presentación a indicador de gramos
def convert_presentation_to_grams(row):
    if row['MLSize'] == 0:
        if 'KG' in row['Presentation']:
            return int(float(row['Presentation'].split()[1]) * 1000)
        elif 'GR' in row['Presentation']:
            return int(row['Presentation'].split()[1])
    return 0

In [802]:
# Convertir toda la columna de presentación a indicador de gramos en tabla de productos
products_df['Presentation'] = products_df.apply(convert_presentation_to_grams, axis=1)

products_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  products_df['Presentation'] = products_df.apply(convert_presentation_to_grams, axis=1)


Unnamed: 0,Material,Productos_Por_Empaque,MLSize,Returnability,GlobalFlavor,Container,GlobalCategory,BrandGrouper,Presentation,Brand
0,9465,6,600,NO RETORNABLE,LIMÓN,PLASTICO,AGUA,CIEL,0,CIEL EXPRIM
1,14450,6,500,NO RETORNABLE,ARÁNDANO ACAÍ,PLASTICO,BEBIDAS EMERGENTES,POWERADE,0,POWERADE FIT
2,148,24,500,RETORNABLE,UVA,VIDRIO,REFRESCOS,FANTA,0,FANTA
3,1088,4,3800,NO RETORNABLE,SIN SABOR,PLASTICO,AGUA,SIERRAZUL,0,SIERRAZUL AGUA PURIFICADA
4,2142,6,340,NO RETORNABLE,SIN SABOR,VIDRIO,AGUA,TOPO CHICO,0,TOPO CHICO A.M.


##### One-Hot a Categóricas

In [803]:
# Definir columnas categóricas a codificar
categorical_columns = ["Returnability", "GlobalFlavor", "Container", "GlobalCategory", "BrandGrouper", "Brand"]

# Aplicar One-Hot Encoding a las columnas categóricas
onehot_encoder = OneHotEncoder(sparse_output=False, drop='first')
categorical_encoded = onehot_encoder.fit_transform(products_df[categorical_columns])

# Convertir los datos codificados a un DataFrame
categorical_encoded_df = pd.DataFrame(categorical_encoded)

# Eliminar las columnas categóricas originales de products_df y concatenar con las columnas codificadas
products_df = pd.concat([products_df.drop(columns=categorical_columns).reset_index(drop=True), categorical_encoded_df], axis=1)

products_df.head()

Unnamed: 0,Material,Productos_Por_Empaque,MLSize,Presentation,0,1,2,3,4,5,...,154,155,156,157,158,159,160,161,162,163
0,9465,6,600,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,14450,6,500,0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,148,24,500,0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1088,4,3800,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2142,6,340,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


##### Normalizar

In [804]:
# Escalar todas las columnas (numéricas y de One-Hot Encoding) para que estén en la misma escala
products_df.columns = products_df.columns.astype(str)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(products_df.drop(columns=['Material']))

##### Componentes Principales

In [805]:
# Aplicar PCA con 50 componentes
pca = PCA(n_components=50)
pca_data = pca.fit_transform(scaled_data)

# Calcular la varianza acumulada para cada número de componentes
cumulative_variance = np.cumsum(pca.explained_variance_ratio_) * 100

# Mostrar la variabilidad explicada
print("Varianza acumulada con 50 componentes: {:.2f}%".format(cumulative_variance[-1]))

Varianza acumulada con 50 componentes: 94.45%


In [806]:
# Crear un DataFrame final con el 'Material' como id y los componentes principales
products_pca_df = pd.DataFrame(pca_data)
products_pca_df.insert(0, 'product_id', products_df['Material'].values)
products_pca_df.head()

Unnamed: 0,product_id,0,1,2,3,4,5,6,7,8,...,40,41,42,43,44,45,46,47,48,49
0,9465,0.21119,-0.584249,-0.514844,0.24564,0.461752,0.141713,-0.346612,0.040673,0.411813,...,0.055065,-0.020577,-0.269467,-0.028368,0.083516,0.134845,0.073024,0.00784,-0.049117,0.007456
1,14450,0.686275,-0.398923,-0.57862,-0.190855,-0.191997,0.046877,-0.230071,0.119015,-0.057407,...,0.077161,-0.020709,0.000406,0.020466,0.071175,-0.025822,-0.05188,-0.022502,0.049764,0.002399
2,148,-0.508888,0.009802,1.388761,0.327996,-0.536266,-0.722467,-0.351427,0.55452,0.474337,...,-0.058504,-0.001113,0.098721,-0.242757,-0.093085,-0.049369,0.254121,-0.100427,-0.108341,-0.005811
3,1088,0.206965,-0.556361,-0.438045,0.322961,0.597207,-0.001358,-0.193802,0.021642,0.279174,...,0.068534,-0.099131,0.050758,-0.184602,-0.214173,0.192988,0.186431,0.34032,-0.161584,-0.036259
4,2142,0.449714,0.232054,0.481005,0.343943,0.830362,-0.025471,-0.433537,0.424208,0.789216,...,-0.109315,0.022771,0.090009,0.059466,-0.137904,-0.093743,-0.031717,-0.044904,-0.002086,0.027855


##### Matriz de Características

In [807]:
# Definir matriz, excluir columnas no relacionadas a las caracteristicas
feature_matrix = products_pca_df.drop(columns=['product_id']).values

#### Ventas

##### Convertir Calmonth a DateTime

In [808]:
sales_df['calmonth'] = pd.to_datetime(sales_df['calmonth'], format='%Y%m').dt.to_period('M')

sales_df.head()

Unnamed: 0,CustomerId,material,calmonth,uni_box
0,499920078,9151,2019-09,0.4364
1,499920078,2287,2019-09,3.1701
2,499920078,4526,2019-09,0.2818
3,499920078,14050,2019-09,0.2642
4,499920078,1333,2019-09,2.1134


##### Renombrar Columnas

In [809]:
sales_df = sales_df.rename(columns={
    'CustomerId': 'customer_id',
    'material': 'product_id',
    'calmonth': 'sale_date',
    'uni_box': 'sale_gallons'
})

sales_df.head()

Unnamed: 0,customer_id,product_id,sale_date,sale_gallons
0,499920078,9151,2019-09,0.4364
1,499920078,2287,2019-09,3.1701
2,499920078,4526,2019-09,0.2818
3,499920078,14050,2019-09,0.2642
4,499920078,1333,2019-09,2.1134


##### Agrupar por Mes

In [810]:
sales_df = sales_df.groupby(['customer_id', 'product_id', 'sale_date'], as_index=False)['sale_gallons'].sum()

sales_df.head()

Unnamed: 0,customer_id,product_id,sale_date,sale_gallons
0,499920078,1,2020-02,1.5005
1,499920078,24,2019-09,0.5284
2,499920078,24,2019-11,0.5284
3,499920078,24,2020-01,0.5284
4,499920078,24,2020-04,0.5284


##### Transponer Meses

In [811]:
# Crear una tabla dinámica para reorganizar los datos
pivot_df = sales_df.pivot_table(
    index=['customer_id', 'product_id'],  # Las combinaciones de cliente y producto como índice
    columns='sale_date',                   # Las fechas como columnas (enero, febrero, etc.)
    values='sale_gallons',                 # Los valores de venta
    aggfunc='sum',                         # Sumar las ventas en caso de múltiples registros
    fill_value=0                           # Rellenar con 0 en caso de que no haya ventas para un mes
)

# Restablecer el índice para tener un DataFrame limpio
sales_df = pivot_df.reset_index()

sales_df.head()

sale_date,customer_id,product_id,2019-09,2019-10,2019-11,2019-12,2020-01,2020-02,2020-03,2020-04,...,2022-03,2022-04,2022-05,2022-06,2022-07,2022-08,2022-09,2022-10,2022-11,2022-12
0,499920078,1,0.0,0.0,0.0,0.0,0.0,1.5005,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,499920078,24,0.5284,0.0,0.5284,0.0,0.5284,0.0,0.0,0.5284,...,0.5284,0.0,0.0,0.0,0.5284,0.0,0.5284,0.0,0.5284,0.5284
2,499920078,100,16.9072,8.4536,19.0206,16.9072,16.9072,14.7938,14.7938,25.3608,...,8.4536,8.4536,10.567,12.6804,8.4536,10.567,6.3402,8.4536,10.567,6.3402
3,499920078,101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,499920078,117,2.1134,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


##### Calcular Frecuencia de Ventas por Producto

In [812]:
# Calculamos el número de meses con ventas para cada producto entre el total de meses
sale_frequency = (sales_df.iloc[:, 2:] > 0).sum(axis=1) / (sales_df.iloc[:, 2:]).shape[1]
sales_df.insert(2, "sale_frequency", sale_frequency)

sales_df.head()

sale_date,customer_id,product_id,sale_frequency,2019-09,2019-10,2019-11,2019-12,2020-01,2020-02,2020-03,...,2022-03,2022-04,2022-05,2022-06,2022-07,2022-08,2022-09,2022-10,2022-11,2022-12
0,499920078,1,0.025,0.0,0.0,0.0,0.0,0.0,1.5005,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,499920078,24,0.475,0.5284,0.0,0.5284,0.0,0.5284,0.0,0.0,...,0.5284,0.0,0.0,0.0,0.5284,0.0,0.5284,0.0,0.5284,0.5284
2,499920078,100,1.0,16.9072,8.4536,19.0206,16.9072,16.9072,14.7938,14.7938,...,8.4536,8.4536,10.567,12.6804,8.4536,10.567,6.3402,8.4536,10.567,6.3402
3,499920078,101,0.075,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,499920078,117,0.025,2.1134,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


##### Calcular Magnitud de Ventas por Producto

In [813]:
# Calculamos el total de galones vendidos por cada producto
sales_df['sales_per_product'] = sales_df.iloc[:, 3:].sum(axis=1)

# Calculamos la proporción de galones vendidos por producto
sales_df.insert(3, "sale_proportion", sales_df['sales_per_product'] / sales_df.groupby('customer_id')['sales_per_product'].transform('sum'))

# Eliminamos la columna temporal 'sales_per_product'
sales_df.drop(columns=['sales_per_product'], inplace=True)

sales_df.head()

sale_date,customer_id,product_id,sale_frequency,sale_proportion,2019-09,2019-10,2019-11,2019-12,2020-01,2020-02,...,2022-03,2022-04,2022-05,2022-06,2022-07,2022-08,2022-09,2022-10,2022-11,2022-12
0,499920078,1,0.025,0.000243,0.0,0.0,0.0,0.0,0.0,1.5005,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,499920078,24,0.475,0.001629,0.5284,0.0,0.5284,0.0,0.5284,0.0,...,0.5284,0.0,0.0,0.0,0.5284,0.0,0.5284,0.0,0.5284,0.5284
2,499920078,100,1.0,0.12136,16.9072,8.4536,19.0206,16.9072,16.9072,14.7938,...,8.4536,8.4536,10.567,12.6804,8.4536,10.567,6.3402,8.4536,10.567,6.3402
3,499920078,101,0.075,0.001028,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,499920078,117,0.025,0.000343,2.1134,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


##### Limpiar Tabla

In [814]:
sales_df = sales_df[['customer_id', 'product_id', 'sale_frequency', 'sale_proportion']]

sales_df.head()

sale_date,customer_id,product_id,sale_frequency,sale_proportion
0,499920078,1,0.025,0.000243
1,499920078,24,0.475,0.001629
2,499920078,100,1.0,0.12136
3,499920078,101,0.075,0.001028
4,499920078,117,0.025,0.000343


### Modelación

#### Producto Nuevo

##### Definir Producto Nuevo

In [815]:
new_product_data = {
    'Material_desc': 'SPRITE 600 ML NR 24B', 
    'Productos_Por_Empaque': 24, 
    'BrandPresRet': 'SPRITE 600 ML NO RETORNABLE', 
    'ProdKey': 'SABORES INDIVIDUAL NO RETORNABLE', 
    'Brand': 'SPRITE', 
    'Presentation': '600 ML NR', 
    'MLSize': 600, 
    'Returnability': 'NO RETORNABLE', 
    'Pack': '600 ML', 
    'Size': 'INDIVIDUAL', 
    'Flavor': 'LIMA LIMON', 
    'Container': 'PLASTICO', 
    'Ncb': 0, 
    'ProductType': 'SABORES REGULAR', 
    'ProductCategory': 'REFRESCOS', 
    'SegAg': 'SABORES', 
    'SegDet': 'SABORES REGULAR', 
    'GlobalCategory': 'REFRESCOS', 
    'GlobalSubcategory': 'SABORES', 
    'BrandGrouper': 'SPRITE', 
    'GlobalFlavor': 'LIMA LIMÓN'
    }

new_product = pd.DataFrame([new_product_data])

new_product = products_df_raw[products_df_raw['Material'] == 9465].drop(columns=['Material'])

new_product.head()

Unnamed: 0,Material_desc,Productos_Por_Empaque,BrandPresRet,ProdKey,Brand,Presentation,MLSize,Returnability,Pack,Size,...,Container,Ncb,ProductType,ProductCategory,SegAg,SegDet,GlobalCategory,GlobalSubcategory,BrandGrouper,GlobalFlavor
0,CIEL EXPRIM LIMON 600 ML NR 6B,6,CIEL EXPRIM 600 ML NO RETORNABLE,AGUA SABORIZADA,CIEL EXPRIM,600 ML NR,600,NO RETORNABLE,600 ML,INDIVIDUAL,...,PLASTICO,1,AGUA SABORIZADA,AGUA,AGUA SABORIZADA,AGUA SABORIZADA,AGUA,AGUA SABORIZADA,CIEL,LIMÓN


##### Transformar Producto Nuevo a Formato de Tabla de Productos

In [816]:
# Extraer columnas relevantes
new_product = new_product[relevant_columns[1:]]

# Extraer gramos
new_product['Presentation'] = new_product.apply(convert_presentation_to_grams, axis=1)

# One-Hot
categorical_encoded = onehot_encoder.transform(new_product[categorical_columns])
categorical_encoded_df = pd.DataFrame(categorical_encoded)
new_product = pd.concat([new_product.drop(columns=categorical_columns).reset_index(drop=True), categorical_encoded_df], axis=1)

# Normalizar
new_product.columns = new_product.columns.astype(str)
scaled_data = scaler.transform(new_product)

# Componentes Principales
pca_data = pca.transform(scaled_data)
new_product = pd.DataFrame(pca_data)

new_product.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,0.21119,-0.584249,-0.514844,0.24564,0.461752,0.141713,-0.346612,0.040673,0.411813,-0.153756,...,0.054692,-0.021233,-0.268531,-0.02997,0.082954,0.134074,0.071758,0.008258,-0.047001,0.009347


#### Similitud Coseno

In [817]:
# Calcular similitud coseno entre el producto nuevo y todos los productos
similarities = cosine_similarity(new_product, feature_matrix)[0, :]

# Juntar product_id con su similitud
products_pca_df['product_similarity'] = similarities

products_similarities = products_pca_df[['product_id', 'product_similarity']]

products_similarities.head()

Unnamed: 0,product_id,product_similarity
0,9465,0.999998
1,14450,0.16489
2,148,-0.176713
3,1088,0.224954
4,2142,0.007484


##### Unir Tablas

In [818]:
merged_df = pd.merge(sales_df, products_similarities, on='product_id', how='left')

merged_df.head()

Unnamed: 0,customer_id,product_id,sale_frequency,sale_proportion,product_similarity
0,499920078,1,0.025,0.000243,-0.243338
1,499920078,24,0.475,0.001629,0.004828
2,499920078,100,1.0,0.12136,-0.243619
3,499920078,101,0.075,0.001028,-0.162359
4,499920078,117,0.025,0.000343,-0.194351


#### Unión de Indicadores

In [819]:
# Pesos para cada indicador
alpha = 0.2    # Peso para sale_frequency
beta = 0.2     # Peso para sale_volume
gamma = 0.6    # Peso para product_similarity

# Calcular la métrica de compatibilidad
merged_df['compatibility'] = (
    alpha * merged_df['sale_frequency'] +
    beta * merged_df['sale_proportion'] +
    gamma * merged_df['product_similarity']
)

merged_df.head()

Unnamed: 0,customer_id,product_id,sale_frequency,sale_proportion,product_similarity,compatibility
0,499920078,1,0.025,0.000243,-0.243338,-0.140954
1,499920078,24,0.475,0.001629,0.004828,0.098223
2,499920078,100,1.0,0.12136,-0.243619,0.078101
3,499920078,101,0.075,0.001028,-0.162359,-0.08221
4,499920078,117,0.025,0.000343,-0.194351,-0.111542


In [820]:
compatible_df = merged_df.drop(columns=['sale_frequency', 'sale_proportion', 'product_similarity'])
compatible_df.head()

Unnamed: 0,customer_id,product_id,compatibility
0,499920078,1,-0.140954
1,499920078,24,0.098223
2,499920078,100,0.078101
3,499920078,101,-0.08221
4,499920078,117,-0.111542


#### Ordenar por Compatibilidad

In [821]:
# Ordena el dataframe por 'compatibility' y 'customer_id' en orden descendente
sorted_df = compatible_df.sort_values(by=['compatibility', 'customer_id'], ascending=[False, True])

sorted_df.head()

Unnamed: 0,customer_id,product_id,compatibility
70685,500160639,9460,0.804415
99638,500281490,9460,0.77997
181158,510278187,9460,0.779869
101247,500283413,9460,0.775299
37166,500058196,9460,0.774932


#### Gestionar Clientes Duplicados 

In [822]:
### Aquí definimos el criterio de filtrado por ejemplo si queremos juntar los mejores 10 productos de cada cliente

# Por ahora solo dejamos el mejor producto por cliente
unique_df = sorted_df.drop_duplicates(subset='customer_id', keep='first')

# unique_df.head(50).customer_id.tolist()

unique_df.head()

Unnamed: 0,customer_id,product_id,compatibility
70685,500160639,9460,0.804415
99638,500281490,9460,0.77997
181158,510278187,9460,0.779869
101247,500283413,9460,0.775299
37166,500058196,9460,0.774932


### Validación

#### Comparación entre datos reales y predichos con ID

In [823]:
# Id de Producto
product_id = 9465

# Pesos de indicadores de valores reales
alpha = 0.5
beta = 0.5

product_df = sales_df[sales_df['product_id'] == product_id].copy()

product_df['success'] = (
        alpha * product_df['sale_frequency'] +
        beta * product_df['sale_proportion']
    )

product_success = product_df.sort_values(by='success', ascending=False)

product_success.head()

sale_date,customer_id,product_id,sale_frequency,sale_proportion,success
85258,500190103,9465,0.775,0.001345,0.388172
77874,500173894,9465,0.75,0.000696,0.375348
103382,500286253,9465,0.675,0.001729,0.338364
181162,510278187,9465,0.675,0.000565,0.337783
102193,500285250,9465,0.65,0.00121,0.325605


In [824]:
def compare_top_predictions(real_top, predicted_top, top_n):
    # Convierte la primera columna en listas de valores
    real_set = set(real_top.iloc[:top_n, 0].tolist())
    pred_set = set(predicted_top.iloc[:top_n, 0].tolist())
    
    # Cuenta los elementos en común entre ambos conjuntos
    match_count = len(real_set.intersection(pred_set))
    return match_count

In [825]:
# Top clientes predichos (ordenados por el puntaje predicho)
top1 = compare_top_predictions(product_success, unique_df, top_n=1)
top5 = compare_top_predictions(product_success, unique_df, top_n=5)
top10 = compare_top_predictions(product_success, unique_df, top_n=10)
top20 = compare_top_predictions(product_success, unique_df, top_n=20)
top50 = compare_top_predictions(product_success, unique_df, top_n=50)
top100 = compare_top_predictions(product_success, unique_df, top_n=100)

print(f"Precisión Top 1: {top1}/1 = {top1 / 1 * 100:.2f}%")
print(f"Precisión Top 5: {top5}/5 = {top5 / 5 * 100:.2f}%")
print(f"Precisión Top 10: {top10}/10 = {top10 / 10 * 100:.2f}%")
print(f"Precisión Top 20: {top20}/20 = {top20 / 20 * 100:.2f}%")
print(f"Precisión Top 50: {top50}/50 = {top50 / 50 * 100:.2f}%")
print(f"Precisión Top 100: {top100}/100 = {top100 / 100 * 100:.2f}%")

Precisión Top 1: 0/1 = 0.00%
Precisión Top 5: 1/5 = 20.00%
Precisión Top 10: 1/10 = 10.00%
Precisión Top 20: 7/20 = 35.00%
Precisión Top 50: 22/50 = 44.00%
Precisión Top 100: 42/100 = 42.00%
