In [16]:
import pandas as pd

In [None]:
employee_df = pd.read_csv("./data/empleados.csv")
employee_df.head()

Unnamed: 0,ID_Empleado,Nombre,Departamento,Salario,Fecha_Ingreso
0,1,Ana,Ventas,50000,2015-06-01
1,2,Luis,TI,70000,2017-03-15
2,3,Pedro,Marketing,60000,2018-11-01
3,4,Marta,TI,75000,2020-01-10
4,5,Carlos,Ventas,52000,2016-05-01


In [18]:
bonuses_df = pd.read_csv("./data/bonificaciones.csv")
bonuses_df.head()

Unnamed: 0,ID_Empleado,Bonificacion
0,1,5000
1,2,8000
2,3,3000
3,4,10000
4,5,2500


In [19]:
#---------------------------------------------
# Aplicaciones de funciones personalizadas
#----------------------------------------------

def salary_year(salary):
    return salary * 12

employee_df["Salario_Anual"] = employee_df["Salario"].apply(salary_year)
employee_df.head()

Unnamed: 0,ID_Empleado,Nombre,Departamento,Salario,Fecha_Ingreso,Salario_Anual
0,1,Ana,Ventas,50000,2015-06-01,600000
1,2,Luis,TI,70000,2017-03-15,840000
2,3,Pedro,Marketing,60000,2018-11-01,720000
3,4,Marta,TI,75000,2020-01-10,900000
4,5,Carlos,Ventas,52000,2016-05-01,624000


In [20]:
def if_seniority_is_greaterthan_six(hire_date):
    today = pd.to_datetime('today')
    seniority = today - pd.to_datetime(hire_date)
    return seniority.days / 365 > 6

employee_df["Antiguedad_mayor_6"] = employee_df["Fecha_Ingreso"].apply(if_seniority_is_greaterthan_six)
employee_df.head()

Unnamed: 0,ID_Empleado,Nombre,Departamento,Salario,Fecha_Ingreso,Salario_Anual,Antiguedad_mayor_6
0,1,Ana,Ventas,50000,2015-06-01,600000,True
1,2,Luis,TI,70000,2017-03-15,840000,True
2,3,Pedro,Marketing,60000,2018-11-01,720000,True
3,4,Marta,TI,75000,2020-01-10,900000,False
4,5,Carlos,Ventas,52000,2016-05-01,624000,True


In [22]:
#---------------------------------------------
# pivot tables
#----------------------------------------------

pivot_department = employee_df.pivot_table(values='Salario', index='Departamento', aggfunc='mean')
pivot_department

Unnamed: 0_level_0,Salario
Departamento,Unnamed: 1_level_1
Marketing,59500.0
TI,72500.0
Ventas,50000.0


In [23]:
#---------------------------------------------
# merge and join
#----------------------------------------------
full_df = pd.merge(employee_df, bonuses_df, on='ID_Empleado', how='left')
full_df

Unnamed: 0,ID_Empleado,Nombre,Departamento,Salario,Fecha_Ingreso,Salario_Anual,Antiguedad_mayor_6,Bonificacion
0,1,Ana,Ventas,50000,2015-06-01,600000,True,5000
1,2,Luis,TI,70000,2017-03-15,840000,True,8000
2,3,Pedro,Marketing,60000,2018-11-01,720000,True,3000
3,4,Marta,TI,75000,2020-01-10,900000,False,10000
4,5,Carlos,Ventas,52000,2016-05-01,624000,True,2500
5,6,Laura,Marketing,59000,2019-07-23,708000,False,4000
6,7,José,Ventas,48000,2021-01-01,576000,False,1500


In [None]:
departments_data = {
    'Departamento': ['Ventas', 'TI', 'Marketing'],
    'Ubicacion': ['Madrid', 'Barcelona', 'Valencia']
}
departments_df = pd.DataFrame(departments_data)
departments_df

Unnamed: 0,Departamento,Ubicacion
0,Ventas,Madrid
1,TI,Barcelona
2,Marketing,Valencia


In [None]:
# Establecer 'Departamento' como índice del DataFrame de departamentos
departments_df.set_index('Departamento', inplace=True)
departments_df

Unnamed: 0_level_0,Ubicacion
Departamento,Unnamed: 1_level_1
Ventas,Madrid
TI,Barcelona
Marketing,Valencia


In [28]:
join_df = employee_df.set_index('Departamento').join(departments_df)
join_df

Unnamed: 0_level_0,ID_Empleado,Nombre,Salario,Fecha_Ingreso,Salario_Anual,Antiguedad_mayor_6,Ubicacion
Departamento,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Ventas,1,Ana,50000,2015-06-01,600000,True,Madrid
TI,2,Luis,70000,2017-03-15,840000,True,Barcelona
Marketing,3,Pedro,60000,2018-11-01,720000,True,Valencia
TI,4,Marta,75000,2020-01-10,900000,False,Barcelona
Ventas,5,Carlos,52000,2016-05-01,624000,True,Madrid
Marketing,6,Laura,59000,2019-07-23,708000,False,Valencia
Ventas,7,José,48000,2021-01-01,576000,False,Madrid


In [33]:
# Crear el DataFrame de ejemplo
data = {
	'producto': ['Camisa', 'Pantalón', 'Zapatos', 'Sudadera', 'Sombrero'],
	'precio': [20.0, 40.0, 50.0, 60.0, 15.0],
    'cantidad': [2, 3, 1, 2, 5]
}
 
df = pd.DataFrame(data)
df

Unnamed: 0,producto,precio,cantidad
0,Camisa,20.0,2
1,Pantalón,40.0,3
2,Zapatos,50.0,1
3,Sudadera,60.0,2
4,Sombrero,15.0,5


In [36]:
# defining function to check price
def fun(num):
    if num<=20:
        return "Low"
    elif num<=40:
        return "Normal"
    else:
        return "High"

df['total'] = df.apply(lambda d : d.precio* d.cantidad, axis=1)
df['clasificacion'] = df['precio'].apply(fun)


In [37]:
df

Unnamed: 0,producto,precio,cantidad,total,clasificacion
0,Camisa,20.0,2,40.0,Low
1,Pantalón,40.0,3,120.0,Normal
2,Zapatos,50.0,1,50.0,High
3,Sudadera,60.0,2,120.0,High
4,Sombrero,15.0,5,75.0,Low
