In [4]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

---

# Columnas Categoricas

In [5]:
df = pd.read_csv("hr_raw_data_final.csv", index_col = 0)

In [20]:
df.dtypes

age                          object
attrition                    object
businesstravel               object
dailyrate                   float64
department                   object
distancefromhome              int64
education                     int64
educationfield               object
employeecount                 int64
employeenumber                int64
environmentsatisfaction       int64
gender                        int64
hourlyrate                  float64
jobinvolvement                int64
joblevel                      int64
jobrole                      object
jobsatisfaction               int64
maritalstatus                object
monthlyincome                object
monthlyrate                  object
numcompaniesworked            int64
over18                       object
overtime                     object
percentsalaryhike             int64
performancerating            object
relationshipsatisfaction      int64
standardhours                object
stockoptionlevel            

In [4]:
def buscar_columnas_categoricas(df):
    """
    Busca y devuelve las columnas de tipo categórico en un DataFrame.

    Parámetros:
    - df (pd.DataFrame): El DataFrame en el que se buscarán columnas categóricas.

    Retorna:
    - list: Lista de nombres de columnas que son de tipo categórico.
    """
    # Verifica si las columnas son del tipo 'category' o de tipo 'object' (usualmente usadas para categóricos)
    columnas_categoricas = df.select_dtypes(include=['O']).columns.tolist()
    return columnas_categoricas


In [5]:
print(f"Columnas categoricas: {buscar_columnas_categoricas(df)}")

Columnas categoricas: ['age', 'attrition', 'businesstravel', 'department', 'educationfield', 'jobrole', 'maritalstatus', 'monthlyincome', 'monthlyrate', 'over18', 'overtime', 'performancerating', 'standardhours', 'totalworkingyears', 'worklifebalance', 'yearsincurrentrole', 'sameasmonthlyincome', 'salary', 'roledepartament', 'remotework']


---

# Transformación de los NAN

In [6]:
def transformar_nan_sql(df, columna=None, valor_reemplazo='NULL'):
    """
    Reemplaza los valores NaN en una columna específica o en todas las columnas 
    de un DataFrame con un valor compatible con SQL.

    Parámetros:
    - df (pd.DataFrame): El DataFrame en el que se reemplazarán los NaN.
    - columna (str, opcional): El nombre de la columna donde se reemplazarán los NaN. 
                               Si es None, reemplazará en todas las columnas.
    - valor_reemplazo: El valor con el que se reemplazarán los NaN. Ejemplo: 'NULL', '', 0.

    Retorna:
    - pd.DataFrame: El DataFrame con los NaN reemplazados.
    """
    if columna:
        # Si se especifica una columna, verifica que exista
        if columna not in df.columns:
            raise ValueError(f"La columna '{columna}' no existe en el DataFrame.")
        # Reemplaza los NaN en la columna específica
        df[columna] = df[columna].replace({np.nan: valor_reemplazo})
    else:
        # Reemplaza los NaN en todas las columnas
        df = df.replace({np.nan: valor_reemplazo})
    
    return df

In [None]:
# Reemplazar los NaN en una columna específica
transformar_nan_sql(df, columna="department", valor_reemplazo='NULL')

Unnamed: 0,age,attrition,businesstravel,dailyrate,department,distancefromhome,education,educationfield,employeecount,employeenumber,...,yearsatcompany,yearsincurrentrole,yearssincelastpromotion,yearswithcurrmanager,sameasmonthlyincome,datebirth,salary,roledepartament,numberchildren,remotework
0,51,No,,2015.722222,,6,3,,1,1,...,20,,15,15,"16280,83$",1972,"195370,00$",,,Yes
1,52,No,,2063.388889,,1,4,Life Sciences,1,2,...,33,,11,9,,1971,"199990,00$",,,1
2,42,No,travel_rarely,1984.253968,Research & Development,4,2,Technical Degree,1,3,...,22,,11,15,,1981,"192320,00$",ManaGER - Research & Development,,1
3,47,No,travel_rarely,1771.404762,,2,4,Medical,1,4,...,20,,5,6,"14307,50$",1976,"171690,00$",,,False
4,46,No,,1582.771346,,3,3,Technical Degree,1,5,...,19,,2,8,"12783,92$",1977,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1673,43,No,,488.944444,,-26,3,Medical,1,824,...,3,,1,2,"3949,17$",1980,,,,Yes
1674,47,No,,1973.984127,,26,4,,1,1087,...,5,,1,0,"15943,72$",1976,"191324,62$",,,False
1675,29,No,travel_rarely,290.035510,,15,3,,1,528,...,6,,1,5,,1994,"28111,13$",,,False
1676,47,No,travel_rarely,1032.487286,,4,3,Life Sciences,1,76,...,22,,14,10,"8339,32$",1976,"100071,84$",,,Yes


In [8]:
# Reemplazar los NaN en todas las columnas
transformar_nan_sql(df, valor_reemplazo='NULL')

Unnamed: 0,age,attrition,businesstravel,dailyrate,department,distancefromhome,education,educationfield,employeecount,employeenumber,...,yearsatcompany,yearsincurrentrole,yearssincelastpromotion,yearswithcurrmanager,sameasmonthlyincome,datebirth,salary,roledepartament,numberchildren,remotework
0,51,No,,2015.722222,,6,3,,1,1,...,20,,15,15,"16280,83$",1972,"195370,00$",,,Yes
1,52,No,,2063.388889,,1,4,Life Sciences,1,2,...,33,,11,9,,1971,"199990,00$",,,1
2,42,No,travel_rarely,1984.253968,Research & Development,4,2,Technical Degree,1,3,...,22,,11,15,,1981,"192320,00$",ManaGER - Research & Development,,1
3,47,No,travel_rarely,1771.404762,,2,4,Medical,1,4,...,20,,5,6,"14307,50$",1976,"171690,00$",,,False
4,46,No,,1582.771346,,3,3,Technical Degree,1,5,...,19,,2,8,"12783,92$",1977,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1673,43,No,,488.944444,,-26,3,Medical,1,824,...,3,,1,2,"3949,17$",1980,,,,Yes
1674,47,No,,1973.984127,,26,4,,1,1087,...,5,,1,0,"15943,72$",1976,"191324,62$",,,False
1675,29,No,travel_rarely,290.035510,,15,3,,1,528,...,6,,1,5,,1994,"28111,13$",,,False
1676,47,No,travel_rarely,1032.487286,,4,3,Life Sciences,1,76,...,22,,14,10,"8339,32$",1976,"100071,84$",,,Yes


In [9]:
df.head(20)

Unnamed: 0,age,attrition,businesstravel,dailyrate,department,distancefromhome,education,educationfield,employeecount,employeenumber,...,yearsatcompany,yearsincurrentrole,yearssincelastpromotion,yearswithcurrmanager,sameasmonthlyincome,datebirth,salary,roledepartament,numberchildren,remotework
0,51,No,,2015.722222,,6,3,,1,1,...,20,,15,15,"16280,83$",1972,"195370,00$",,,Yes
1,52,No,,2063.388889,,1,4,Life Sciences,1,2,...,33,,11,9,,1971,"199990,00$",,,1
2,42,No,travel_rarely,1984.253968,Research & Development,4,2,Technical Degree,1,3,...,22,,11,15,,1981,"192320,00$",ManaGER - Research & Development,,1
3,47,No,travel_rarely,1771.404762,,2,4,Medical,1,4,...,20,,5,6,"14307,50$",1976,"171690,00$",,,False
4,46,No,,1582.771346,,3,3,Technical Degree,1,5,...,19,,2,8,"12783,92$",1977,,,,0
5,48,No,,1771.920635,Research & Development,22,3,Medical,1,6,...,22,,4,7,"14311,67$",1975,,MANAger - Research & Development,,Yes
6,59,No,,1032.487286,,25,3,Life Sciences,1,7,...,21,,7,9,"8339,32$",1964,"100071,84$",,,True
7,42,No,travel_rarely,556.256661,,1,1,,1,8,...,20,,11,6,,1981,"53914,11$",,,0
8,41,No,,1712.18254,,2,5,,1,9,...,18,,11,8,"13829,17$",1982,"165950,00$",,,True
9,41,No,travel_frequently,1973.984127,,9,3,,1,10,...,18,,0,11,"15943,72$",1982,,,,0


In [None]:
# Crear función de conversión de números a numeros absolutos

def convertir_a_absoluto(numero):
    """
    Convierte un número a su valor absoluto.

    Parámetros:
    numero (int | float): Número a convertir.

    Retorno:
    int | float: Valor absoluto del número.
    """
    return abs(numero)

# Ejemplo de uso
print(convertir_a_absoluto(-5))      
print(convertir_a_absoluto(-3.14)) 

In [None]:
# Crear función para transformar float a string

def float_a_string(dato):
    """
    Convierte un número float a string.
    
    Parámetros:
    dato (float): Número a convertir.
    
    Retorno:
    str: Representación del número en formato string.
    """
    return str(dato)

# Ejemplo de uso
resultado = float_a_string(123.456)
print(resultado)

In [None]:
type(resultado)

In [None]:
# Crear función para transformar string a int. No esta hecho para las columnas adecuadas. 

def string_a_int (dato):
    """
    Convierte un número string a int.
    
    Parámetros:
    dato: Número a convertir.
    
    Retorno:
    int: Representación del número en formato int.
    """
    return int(dato)

# Ejemplo de uso
resultado = string_a_int(123.456)
print(resultado)