In [1]:
import pandas as pd

def tipifica_variables(df, umbral_categoria, umbral_continua):
    """
    Classify columns into variable types based on cardinality and thresholds.

    Arguments:
    df (pd.DataFrame): Input dataframe.
    umbral_categoria (int): Threshold for categorical variables.
    umbral_continua (float): Threshold for continuous variables.

    Returns:
    pd.DataFrame: Dataframe with variable names and suggested types.
    """
    results = []
    for col in df.columns:
        cardinality = df[col].nunique()
        cardinality_percentage = cardinality / len(df)

        if cardinality == 2:
            tipo = "Binaria"
        elif cardinality < umbral_categoria:
            tipo = "Categorica"
        elif cardinality_percentage >= umbral_continua:
            tipo = "Numerica Continua"
        else:
            tipo = "Numerica Discreta"

        results.append({"nombre_variable": col, "tipo_sugerido": tipo})

    return pd.DataFrame(results)

# Ejemplo de uso
if __name__ == "__main__":
    # Carga del archivo Excel
    file_path = "C:/Users/Jesus/Documents/DS_Online_Octubre24_Exercises/04_Machine_Learning/Sprint_10/Team_Challenge/nike_sales_2024.xlsx"
    df_nike = pd.read_excel(file_path)

    # Parámetros de prueba
    umbral_categoria = 25
    umbral_continua = 0.1

    # Aplicar la función
    resultado = tipifica_variables(df_nike, umbral_categoria, umbral_continua)
    print(resultado)


           nombre_variable      tipo_sugerido
0                    Month  Numerica Discreta
1                   Region         Categorica
2            Main_Category         Categorica
3             Sub_Category  Numerica Discreta
4             Product_Line  Numerica Discreta
5               Price_Tier         Categorica
6               Units_Sold  Numerica Continua
7              Revenue_USD  Numerica Continua
8  Online_Sales_Percentage  Numerica Discreta
9             Retail_Price  Numerica Discreta
