# Proyecto: Definición y aplicación de funciones con 'lambda x: f(x)'

### En el presente script usamos algunas funciones como:
`.apply( func, parametros)` <- Aplica la función 'func' y sus 'parámetros'

`.apply( lambda x: f(x))` <-Define y aplica la funcion 'f(x)'

`.apply( lambda x: f(x) , axis=0)` <-Define y aplica la funcion 'f(x)' a cada columna

`.apply( lambda x: f(x) , axis=1)` <-Define y aplica la funcion 'f(x)' a cada renglón

`.applymap( lambda x: f(x))` <- Define y aplica la función 'f(x)' a todo el dataframe

In [1]:
import numpy as np

import pandas as pd

In [2]:
# Cargamos datos:
df = pd.read_csv('./db/Bikes/london_merged_new.csv')

df.head()

Unnamed: 0,cnt,t1,t2,hum,wind_speed,weather_code,is_holiday,is_weekend,season,hour
0,182,3.0,2.0,93.0,6.0,3.0,0.0,1.0,3.0,0
1,138,3.0,2.5,93.0,5.0,1.0,0.0,1.0,3.0,1
2,134,2.5,2.5,96.5,0.0,1.0,0.0,1.0,3.0,2
3,72,2.0,2.0,100.0,0.0,1.0,0.0,1.0,3.0,3
4,47,2.0,0.0,93.0,6.5,1.0,0.0,1.0,3.0,4


In [3]:
# definimos una función:
def func_1(x , a=0 , b=1):
    return x**2 + a*b

In [4]:
# Aplicamos la función a una columna del dataframe:
func_1( df['hour'] , a=1 , b=5 )


0          5
1          6
2          9
3         14
4         21
        ... 
17409    366
17410    405
17411    446
17412    489
17413    534
Name: hour, Length: 17414, dtype: int64

In [5]:
# Aplicamos la función a una columna del dataframe usando '.apply()'
df['hour'].apply(func_1, a=1 , b=5)

0          5
1          6
2          9
3         14
4         21
        ... 
17409    366
17410    405
17411    446
17412    489
17413    534
Name: hour, Length: 17414, dtype: int64

In [6]:
# Aplicación de una misma función a distintas columnas:

# Eleva al cuadrado las columnas especificadas
df[ ['t1','t2'] ].apply( lambda x: x**2)

Unnamed: 0,t1,t2
0,9.00,4.00
1,9.00,6.25
2,6.25,6.25
3,4.00,4.00
4,4.00,0.00
...,...,...
17409,25.00,1.00
17410,25.00,1.00
17411,30.25,2.25
17412,30.25,2.25


In [7]:
# Calculamos la media de la columna 'cnt'
np.mean( df['cnt'] )

1143.1016423567244

In [8]:
# Calculamos la media en cada columna (axis = 0) del dataframe
df.apply(lambda x: x.mean() , axis = 0)

cnt             1143.101642
t1                12.468091
t2                11.520836
hum               72.324954
wind_speed        15.913063
weather_code       2.722752
is_holiday         0.022051
is_weekend         0.285403
season             1.492075
hour              11.513265
dtype: float64

In [9]:
# Calculamos la media del renglón 4:
np.mean( df.iloc[4,:] )

15.75

In [10]:
# Extraemos la media en cada renglón (axis = 1) del dataframe
df.apply(lambda x: x.mean() , axis = 1)

0         29.30
1         24.75
2         24.25
3         18.40
4         15.75
          ...  
17409    117.30
17410     67.60
17411     47.45
17412     35.90
17413     27.10
Length: 17414, dtype: float64

In [11]:
# Calculamos el valor absoluto de la diferencia entre 2 columnas:
np.abs( df['t1'] - df['t2'] )

0        1.0
1        0.5
2        0.0
3        0.0
4        2.0
        ... 
17409    4.0
17410    4.0
17411    4.0
17412    4.0
17413    4.0
Length: 17414, dtype: float64

In [12]:
# Calculamos el valor absoluto de la diferencia entre 2 columnas (usando .apply):
df.apply( lambda x: np.abs( x['t2'] - x['t1'] ) , axis = 1)

0        1.0
1        0.5
2        0.0
3        0.0
4        2.0
        ... 
17409    4.0
17410    4.0
17411    4.0
17412    4.0
17413    4.0
Length: 17414, dtype: float64

In [13]:
# Estandarizamos cada columna del dataframe:
df_norm = df.apply( lambda x: (x - np.mean(x))/np.std(x) , axis=0 )
df_norm

Unnamed: 0,cnt,t1,t2,hum,wind_speed,weather_code,is_holiday,is_weekend,season,hour
0,-0.885745,-1.699331,-1.439290,1.444517,-1.255717,0.118427,-0.150161,1.582347,1.347711,-1.664802
1,-0.926295,-1.699331,-1.363703,1.444517,-1.382390,-0.735874,-0.150161,1.582347,1.347711,-1.520204
2,-0.929982,-1.789071,-1.363703,1.689054,-2.015755,-0.735874,-0.150161,1.582347,1.347711,-1.375605
3,-0.987120,-1.878811,-1.439290,1.933590,-2.015755,-0.735874,-0.150161,1.582347,1.347711,-1.231007
4,-1.010160,-1.878811,-1.741635,1.444517,-1.192381,-0.735874,-0.150161,1.582347,1.347711,-1.086408
...,...,...,...,...,...,...,...,...,...,...
17409,-0.093175,-1.340371,-1.590462,0.606105,0.391031,0.118427,-0.150161,-0.631973,1.347711,1.082572
17410,-0.554893,-1.340371,-1.590462,0.606105,0.644377,0.545577,-0.150161,-0.631973,1.347711,1.227170
17411,-0.742898,-1.250632,-1.514876,0.431436,1.024396,0.545577,-0.150161,-0.631973,1.347711,1.371769
17412,-0.847038,-1.250632,-1.514876,0.256767,0.897723,0.545577,-0.150161,-0.631973,1.347711,1.516368


In [14]:
# Comprobamos que las columnas esten estandarizadas:

# Calculamos la media de cada columna:
df_norm.apply( lambda x: np.round(np.mean(x)) , axis=0)

cnt             0.0
t1             -0.0
t2             -0.0
hum            -0.0
wind_speed      0.0
weather_code    0.0
is_holiday      0.0
is_weekend      0.0
season          0.0
hour           -0.0
dtype: float64

In [15]:
# Comprobamos que las columnas esten estandarizadas:

# Calculamos la desviación estándar de cada columna:
df_norm.apply( lambda x: np.round(np.std(x)) , axis=0)

cnt             1.0
t1              1.0
t2              1.0
hum             1.0
wind_speed      1.0
weather_code    1.0
is_holiday      1.0
is_weekend      1.0
season          1.0
hour            1.0
dtype: float64

In [16]:
# Aplicación de una función dea todo el dataframe:

df.applymap( lambda x: x/1000)

Unnamed: 0,cnt,t1,t2,hum,wind_speed,weather_code,is_holiday,is_weekend,season,hour
0,0.182,0.0030,0.0020,0.0930,0.0060,0.003,0.0,0.001,0.003,0.000
1,0.138,0.0030,0.0025,0.0930,0.0050,0.001,0.0,0.001,0.003,0.001
2,0.134,0.0025,0.0025,0.0965,0.0000,0.001,0.0,0.001,0.003,0.002
3,0.072,0.0020,0.0020,0.1000,0.0000,0.001,0.0,0.001,0.003,0.003
4,0.047,0.0020,0.0000,0.0930,0.0065,0.001,0.0,0.001,0.003,0.004
...,...,...,...,...,...,...,...,...,...,...
17409,1.042,0.0050,0.0010,0.0810,0.0190,0.003,0.0,0.000,0.003,0.019
17410,0.541,0.0050,0.0010,0.0810,0.0210,0.004,0.0,0.000,0.003,0.020
17411,0.337,0.0055,0.0015,0.0785,0.0240,0.004,0.0,0.000,0.003,0.021
17412,0.224,0.0055,0.0015,0.0760,0.0230,0.004,0.0,0.000,0.003,0.022
