In [32]:
import re
import numpy as np
import pandas as pd

# Function definitions

Broken into tokens:

> <b>def </b>
- Tell python that you're starting to create a function

> <b>function_name</b>
- you give that function a name for a future call

> <b>(</b>
- Open parenthesis to give arguments (or variables) that you'll use in that functions. These helps you parametrize code.

> <b>[OPTIONAL] arg1, arg2, arg3, ... </b>
- The arguments of that function

> <b>)</b>
- CLose parenthesis to say you're done creating arguments

> <b>:</b>
- colon to say you're about to start a `code block`

> <b>code block</b>
- where you effectively do something with (or without) the arguments

> <b>return</b>
- The return statement tells you're done with the function. Whether you'll return something from that or not is optional

> <b>[OPTIONAL] something</b>
- The `something` you're allowed to retur.



```python
def function_name(arg1):
    something = arg1 + 10
    return something
```

In [33]:
def function_name(arg1):
    something = arg1 + 10
    return something

function_name(30)

40

In [34]:
function_name(54)

64

# What is a lambda function?

https://realpython.com/python-lambda/


> Named after `lambda calculus`.

> Usually refers to `anonymous functions`

## Defining lambda functions

In [42]:
import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


In [37]:
def half(x):
    return x/2

In [38]:
half

<function __main__.half(x)>

In [51]:
x=10
x/2

5.0

In [44]:
f= lambda x : x/2

In [41]:
f(10)

5.0

In [45]:
lambda arg1 : arg1 + 10

<function __main__.<lambda>(arg1)>

In [46]:
(lambda arg1 : arg1 + 10)(30)

40

In [None]:
(lambda arg1 : arg1 + 10)(35)

In [47]:
function_name = lambda arg1 : arg1 + 10

In [48]:
function_name(30)

40

In [52]:
# map
example_list = [1,4,5,8]

In [53]:
def half(x):
    return x / 2 

In [54]:
list(map(half, example_list))

[0.5, 2.0, 2.5, 4.0]

In [56]:
f=lambda x : x/2

In [57]:
list(map(f, example_list))

[0.5, 2.0, 2.5, 4.0]

## More than 1 argument

In [58]:
def my_sum(a , b):
    return a + b

In [59]:
add_args = lambda a, b : a + b
add_args(10, 20)

30

In [66]:
(lambda a, b=0 : a + b)(10,30)

40

In [62]:
from functools import reduce

In [63]:
example_list

[1, 4, 5, 8]

In [64]:
reduce(lambda acc, value : acc + value, example_list)

18

## Conditions

In [69]:
9/0

ZeroDivisionError: division by zero

In [68]:
safe_div(9, 0)

0

In [70]:
example_tuples=[(10,20),(30,0),(60,30)]

In [67]:
def safe_div(num, denom):
    """
    Return the division of num by denom. 
    In case denom is 0, return 0
    """
    if denom != 0:
        return num/denom
    else:
        return 0

In [28]:
[(a/b if b != 0 else 0) for a,b in example_tuples]

[0.5, 0, 2.0]

# Applications



## map

In [74]:
example_list = [1,4,6,7,10,31,13]

In [75]:
def half(x):
    return x/2

In [76]:
list(map(half, example_list))

[0.5, 2.0, 3.0, 3.5, 5.0, 15.5, 6.5]

In [77]:
list(map(lambda x : x/2, example_list))

[0.5, 2.0, 3.0, 3.5, 5.0, 15.5, 6.5]

## filter

In [78]:
def check_if_even(x):
    if x % 2 == 0:
        return True
    else:
        return False
    
# def check_if_even(x):
#     return x % 2 == 0

In [79]:
example_list

[1, 4, 6, 7, 10, 31, 13]

In [80]:
list(filter(check_if_even, example_list))

[4, 6, 10]

In [90]:
type((lambda x : x % 2 == 0)(7))

bool

In [91]:
list(filter(lambda x : x % 2 == 0, example_list))

[4, 6, 10]

## Using lambdas to order stuff


In [105]:
school_dash = [ 'Art-History','Philosophy', 'Computer-Science', 'Calculus']

In [93]:
sorted(school_dash)

['Art-History', 'Calculus', 'Computer-Science', 'Philosophy']

In [94]:
def get_last_letter(word):
    return word[-1]

In [95]:
get_last_letter('Computer-Science')

'e'

In [106]:
sorted(school_dash, key=get_last_letter)

['Computer-Science', 'Calculus', 'Art-History', 'Philosophy']

In [107]:
sorted(school_dash, key=lambda x : x[-1])

['Computer-Science', 'Calculus', 'Art-History', 'Philosophy']

# Ordering a dictionary by its values

In [108]:
my_dict = {'Andre': 80, 
           'Joao' : 90 , 
           'Pedro': 80, 
           'Carla': 70, 
           'Maria': 80, 
           'Aurora' : 80, 
           'Camila': 60}

In [109]:
my_dict

{'Andre': 80,
 'Joao': 90,
 'Pedro': 80,
 'Carla': 70,
 'Maria': 80,
 'Aurora': 80,
 'Camila': 60}

In [110]:
my_dict.items()

dict_items([('Andre', 80), ('Joao', 90), ('Pedro', 80), ('Carla', 70), ('Maria', 80), ('Aurora', 80), ('Camila', 60)])

In [112]:
sorted(my_dict.items(), key=lambda x : x[1],reverse=True)

[('Joao', 90),
 ('Andre', 80),
 ('Pedro', 80),
 ('Maria', 80),
 ('Aurora', 80),
 ('Carla', 70),
 ('Camila', 60)]

# Pandas apply

In [113]:
import pandas as pd

In [114]:
from tqdm.auto import tqdm
tqdm.pandas(desc="Applying transformation")

  from pandas import Panel


In [125]:
def create_sample_dataframe(n_rows=1000000, n_cols=1):
    """
    Create a pandas dataframe containing n_rows rows and n_cols columns
    and mess up with it by changing the dots (.) by commas (,).
    """
    cpf = np.random.randint(1, 999999999, size=n_rows)
    variables = {f'column_{col_number}': np.random.random(n_rows) for col_number in range(n_cols)}
    variables.update({'CPF': cpf})  

    return pd.DataFrame(variables).applymap(lambda x : str(x).replace('.',','))

df = create_sample_dataframe()

In [127]:
df.head()

Unnamed: 0,column_0,CPF
0.0,2400466347556388,780496400
1.0,5561108752090058,642667419
2.0,5864634178504352,974232273
3.0,8081319121033047,952729533
4.0,4061799595188128,422399038


In [123]:
df.column_0

0         0,12350319221686001
1          0,8856616723948728
2          0,9898639855280966
3          0,9946506857816905
4           0,500726107313392
                 ...         
999995    0,45407200497528777
999996     0,4542445100508896
999997     0,3491812755558068
999998     0,8237531906351844
999999     0,9920388523434426
Name: column_0, Length: 1000000, dtype: object

In [130]:
df.index

Float64Index([     0.0,      1.0,      2.0,      3.0,      4.0,      5.0,
                   6.0,      7.0,      8.0,      9.0,
              ...
              999990.0, 999991.0, 999992.0, 999993.0, 999994.0, 999995.0,
              999996.0, 999997.0, 999998.0, 999999.0],
             dtype='float64', length=1000000)

In [126]:
df.index=list(map(lambda x : float(x),df.index))

In [131]:
df['column_0'].progress_apply(lambda x : float(x.replace(',','.')))

HBox(children=(FloatProgress(value=0.0, description='Applying transformation', max=1000000.0, style=ProgressSt…




0.0         0.240047
1.0         0.556111
2.0         0.586463
3.0         0.808132
4.0         0.406180
              ...   
999995.0    0.570362
999996.0    0.901295
999997.0    0.008003
999998.0    0.425914
999999.0    0.234945
Name: column_0, Length: 1000000, dtype: float64

---

In [132]:
df = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/62f97ae1f8896b6b8e4bb08dcf65a07e0502aa8a/data/2020/2020-05-05/user_reviews.tsv', sep='\t')
df.head()

Unnamed: 0,grade,user_name,text,date
0,4,mds27272,My gf started playing before me. No option to ...,2020-03-20
1,5,lolo2178,"While the game itself is great, really relaxin...",2020-03-20
2,0,Roachant,My wife and I were looking forward to playing ...,2020-03-20
3,0,Houndf,We need equal values and opportunities for all...,2020-03-20
4,0,ProfessorFox,BEWARE! If you have multiple people in your h...,2020-03-20


In [134]:
df.loc[4,'text']

'BEWARE!  If you have multiple people in your house that want to play this game they can not each have their own account (island).  ONLY ONE customizable island can be played per Switch.  Even if you buy two copies of the game you will still only be able to customize ONE island.  I have 3 kids that want to play this game and they each want to customize their own island - AND THEY CANT!BEWARE!  If you have multiple people in your house that want to play this game they can not each have their own account (island).  ONLY ONE customizable island can be played per Switch.  Even if you buy two copies of the game you will still only be able to customize ONE island.  I have 3 kids that want to play this game and they each want to customize their own island - AND THEY CANT!   This is absolutely unacceptable.  Maybe the dumbest thing Nintendo has ever done.  I have owned every Nintendo console since the original NES and I can not think of a worse decision by Nintendo.… Expand'

In [133]:
import re

In [135]:
x = 'BEWARE!  If you have multiple people in your house that want to play this game they can not each have their own account (island).  ONLY ONE customizable island can be played per Switch.  Even if you buy two copies of the game you will still only be able to customize ONE island.  I have 3 kids that want to play this game and they each want to customize their own island - AND THEY CANT!BEWARE!  If you have multiple people in your house that want to play this game they can not each have their own account (island).  ONLY ONE customizable island can be played per Switch.  Even if you buy two copies of the game you will still only be able to customize ONE island.  I have 3 kids that want to play this game and they each want to customize their own island - AND THEY CANT!   This is absolutely unacceptable.  Maybe the dumbest thing Nintendo has ever done.  I have owned every Nintendo console since the original NES and I can not think of a worse decision by Nintendo.… Expand'

In [144]:
re.findall('[A-Z]+ ?[A-Z]+!',x)

['BEWARE!', 'THEY CANT!', 'BEWARE!', 'THEY CANT!']

In [None]:
#how can i find the warning?


In [None]:
df['text_warning'] = df.text.apply(lambda x : re.findall('[A-Z]+!', x))

In [2]:
example_tuples=[(10,20),(30,0),(60,30)]

In [4]:
[i[1] for i in example_tuples]

[20, 0, 30]

In [5]:
list(map(lambda num, denom : num/denom if denom != 0 else 0,[i[0] for i in example_tuples],[i[1] for i in example_tuples]))

[0.5, 0, 2.0]