In [18]:
# 1. Importar librerías y datos
import pandas as pd
from google.colab import files

# Subir archivo desde tu PC
uploaded = files.upload()

# Cargar el archivo CSV (toma automáticamente el nombre que subas)
df = pd.read_csv(list(uploaded.keys())[0])
print("Checkpoint inicial:")
print(df.head())

# 2. Eliminar columna 'ID'
df = df.drop("ID", axis=1)
print("Checkpoint 1 - Eliminada columna ID:")
print(df.head())


# 3. Procesar Reason for Absence
# Crear variables dummies
reason_dummies = pd.get_dummies(df["Reason for Absence"], drop_first=False)

# Agrupar en 4 grupos según rangos
reason_type_1 = reason_dummies.loc[:, 1:14].max(axis=1)
reason_type_2 = reason_dummies.loc[:, 15:17].max(axis=1)
reason_type_3 = reason_dummies.loc[:, 18:21].max(axis=1)
reason_type_4 = reason_dummies.loc[:, 22:28].max(axis=1)

# Concatenamos al dataframe
df = pd.concat([df, reason_type_1, reason_type_2, reason_type_3, reason_type_4], axis=1)
df = df.drop("Reason for Absence", axis=1)

# Renombramos las nuevas columnas
df.columns = list(df.columns[:-4]) + ["Reason_1", "Reason_2", "Reason_3", "Reason_4"]

print("Checkpoint 2 - Procesado Reason for Absence:")
print(df.head())

# 4. Procesar columna Date
# Convertir a datetime
df["Date"] = pd.to_datetime(df["Date"], dayfirst=True)

# Extraer mes y día de la semana
df["Month"] = df["Date"].dt.month
df["Day_of_Week"] = df["Date"].dt.weekday

# Eliminar columna Date
df = df.drop("Date", axis=1)

print("Checkpoint 3 - Procesada columna Date:")
print(df.head())


# 5. Procesar columna Education
df["Education"] = df["Education"].apply(lambda x: 1 if x == 0 else 0)

print("Checkpoint 4 - Procesada columna Education:")
print(df["Education"].value_counts())


# 6. Resultado final
print("DataFrame final:")
print(df.head())
print("Dimensiones finales:", df.shape)

# Guardar
df.to_csv("df-cleaned.csv", index=False)

# Descargar
files.download("df-cleaned.csv")


Saving Absenteeism-data.csv to Absenteeism-data (3).csv
Checkpoint inicial:
   ID  Reason for Absence        Date  Transportation Expense  \
0  11                  26  07/07/2015                     289   
1  36                   0  14/07/2015                     118   
2   3                  23  15/07/2015                     179   
3   7                   7  16/07/2015                     279   
4  11                  23  23/07/2015                     289   

   Distance to Work  Age  Daily Work Load Average  Body Mass Index  Education  \
0                36   33                  239.554               30          1   
1                13   50                  239.554               31          1   
2                51   38                  239.554               31          1   
3                 5   39                  239.554               24          1   
4                36   33                  239.554               30          1   

   Children  Pets  Absenteeism Time in Hours  

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>