# How to write `Function`


In [1]:
def my_function(x, y):
    pass

In [10]:
def my_sq(x):
    return x ** 2

In [5]:
my_sq(2)

4

In [11]:
assert my_sq(4) == 16

In [12]:
def avg_2(x, y):
    return (x + y) / 2

In [13]:
avg_2(10, 20)

15.0

# Create DataFrame

In [4]:
import pandas as pd

In [5]:
df = pd.DataFrame({
    'a': [10, 20, 30],
    'b': [20, 30, 40]
})

In [6]:
df

Unnamed: 0,a,b
0,10,20
1,20,30
2,30,40


# How to use `apply()` in Pandas

### `Brodcasting` in Pandas

In [7]:
# Brodcasting in Pandas
df['a'] ** 2

0    100
1    400
2    900
Name: a, dtype: int64

In [11]:
my_sq

<function __main__.my_sq(x)>

In [19]:
# We can apply any funtion in apply() method :  self created method or pre-defined method
# function inside apply(), renders each row of Series  
df['a'].apply(my_sq)

0    100
1    400
2    900
Name: a, dtype: int64

In [13]:
# we can use function with 2 or more arguments.
def my_exp(x, e):
    return x ** e

In [15]:
df['a'].apply(my_exp, e=4)

0     10000
1    160000
2    810000
Name: a, dtype: int64

In [20]:
def print_me(x):
    print(x)

In [21]:
df.apply(print_me)

0    10
1    20
2    30
Name: a, dtype: int64
0    20
1    30
2    40
Name: b, dtype: int64


a    None
b    None
dtype: object

In [32]:
df

Unnamed: 0,a,b
0,10,20
1,20,30
2,30,40


In [26]:
def avg_3(x, y, z):
    return (x + y + z) / 3

In [25]:
# When apply'apply()' method on dataframe, it takes coloumn or Vector as an argument unlike each element of Series

df.apply(avg_3)

TypeError: ("avg_3() missing 2 required positional arguments: 'y' and 'z'", 'occurred at index a')

In [27]:
import numpy as np

In [34]:
def avg_3_apply(col):
    return np.mean(col)

In [35]:
df.apply(avg_3_apply)

a    20.0
b    30.0
dtype: float64

In [40]:
df

Unnamed: 0,a,b
0,10,20
1,20,30
2,30,40


In [33]:
def avg_3_apply(col):
    x = col[0]
    y = col[1]
    z = col[2]
    return (x + y + z) / 3

In [29]:
df.apply(avg_3_apply)

a    20.0
b    30.0
dtype: float64

In [34]:
#becaue it need 2 argument and we are giving 3 in the function
df.apply(avg_3_apply, axis='columns')

IndexError: ('index out of bounds', 'occurred at index 0')

In [41]:
df['a'].mean()

20.0

In [42]:
df['a'] + df['b']

0    30
1    50
2    70
dtype: int64

In [45]:
def avg_2_mod(x, y):
    if (x == 20):
        return np.NaN #np.NAN np.nan
    else:
        return(x + y) / 2

In [46]:
df

Unnamed: 0,a,b
0,10,20
1,20,30
2,30,40


In [47]:
#I want avg_2_mod() takes arguments like x = 10, y = 20 at first and so on.

avg_2_mod(df['a'], df['b'])

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [48]:
import numpy as np

In [49]:
# we are vectorizing function 'avg_2_mod' and saving in variable avg_2_mod_vec
avg_2_mod_vec = np.vectorize(avg_2_mod)

In [51]:
df

Unnamed: 0,a,b
0,10,20
1,20,30
2,30,40


In [50]:
avg_2_mod_vec(df['a'], df['b'])

array([15., nan, 35.])

In [53]:
# cwe are using decorator here.
@np.vectorize
def avg_2_mod(x, y):
    if (x == 20):
        return np.NaN #np.NAN np.nan
    else:
        return(x + y) / 2

In [54]:
avg_2_mod(df['a'], df['b'])

array([15., nan, 35.])

# `numba` Library

In [36]:
# numba library used for for scintific computation and really fast and it only takes numpy array as an argument
import numba

In [59]:
@numba.vectorize
def avg_2_mod_numba(x, y):
    if (x == 20):
        return np.NaN
    else:
        return(x + y) / 2

In [60]:
avg_2_mod_numba(df['a'].values, df['b'].values)

array([15., nan, 35.])

# Time Complexity : ipython command `%%timeit`

In [62]:
%%timeit
avg_2(df['a'], df['b'])

445 µs ± 7.79 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [63]:
%%timeit
avg_2_mod(df['a'], df['b'])

211 µs ± 7.73 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [64]:
%%timeit
avg_2_mod_numba(df['a'].values, df['b'].values)

8.01 µs ± 226 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
