# MAP

In [1]:
def func(x):
    return x%2==0

In [2]:
func(2)

True

In [3]:
func(1)

False

In [4]:
l=[10,11,13,15,18]


In [5]:
[func(i) for i in l]

[True, False, False, False, True]

In [6]:
map(func, l)

<map at 0x7ffa296284f0>

In [7]:
list(map(func, l))

[True, False, False, False, True]

## Cannot be applied to dataframe. Can be applied to columns (series)

In [9]:
import numpy as np
import pandas as pd

In [10]:
df=pd.DataFrame([[1,2,3],[2,3,np.nan]])

In [11]:
df

Unnamed: 0,0,1,2
0,1,2,3.0
1,2,3,


In [12]:
#function takes a number and converts it into a float
def ff(x):
    return float(x)

In [13]:
df[0].map(ff)

0    1.0
1    2.0
Name: 0, dtype: float64

In [14]:
df[1].map(ff)

0    2.0
1    3.0
Name: 1, dtype: float64

In [20]:
dct={1:'apple',2:'plum'}

df[0].map(dct)

0    apple
1     plum
Name: 0, dtype: object

In [21]:
df[1].map(dct)


0    plum
1     NaN
Name: 1, dtype: object

## Filter

In [22]:
#map the function
list(map(func, l))

[True, False, False, False, True]

In [23]:
#use filter instead
list(filter(func, l))

[10, 18]

In [24]:
df.index=['mouse','cat']

In [25]:
df

Unnamed: 0,0,1,2
mouse,1,2,3.0
cat,2,3,


In [28]:
#axis 0 defines index. 
df.filter(like='at',axis=0)


Unnamed: 0,0,1,2
cat,2,3,


In [29]:
df.filter(like='mou',axis=0)


Unnamed: 0,0,1,2
mouse,1,2,3.0


In [30]:
#same with loc + str.contains
df.loc[df.index.str.contains('ouse')]

Unnamed: 0,0,1,2
mouse,1,2,3.0


In [31]:
df.filter(like='a',axis=0)

Unnamed: 0,0,1,2
cat,2,3,


In [36]:
#axis 1 stands for columns, items selects by value
df.filter(items=['cat','mouse'],axis=0)

Unnamed: 0,0,1,2
cat,2,3,
mouse,1,2,3.0


In [37]:
#same with loc
df.loc[:,[0,2]]

Unnamed: 0,0,2
mouse,1,3.0
cat,2,


## Lambda

In [38]:
# regular function 
def squared(x):
    return x**2

In [39]:
#apply function to each element
list(map(squared,l))

[100, 121, 169, 225, 324]

In [40]:
#define a lambda function with a name
squared=lambda x: x**2

In [41]:
#apply lambda function
list(map(squared,l))

[100, 121, 169, 225, 324]

In [42]:
#apply lambda function without saving it
list(map(lambda x: x**2,l))

[100, 121, 169, 225, 324]

In [43]:
#apply lambda function to a number
(lambda x: x**2)(11)

121

In [44]:
#which could be done easier here:
squared(11)

121

In [45]:
# define a regular division function
def div(x,y):
    if y!=0:
        return x/y
    else:
        raise ValueError ('Do not divide by 0 you stupid idiot!')

In [48]:
div(2,0)

ValueError: Do not divide by 0 you stupid idiot!

In [49]:
#test 1
div(4,2)

2.0

In [50]:
#define similar function in the same way as generators
def div2(x,y):
    return x/y if y!=0 else 0

In [51]:

#test 1
div2(4,2)

2.0

In [52]:
#test2
div2(4,0)

0

In [53]:
#lambda function
div3=lambda x,y: x/y if y!=0 else 0

In [54]:
div3(4,2)

2.0

In [55]:
div3(4,0)

0

## Function that creates functions

In [56]:
def generate_range(lower):
    return lambda upper: range(lower,upper)

In [57]:
#func1 is a lambda function that will generate ranges starting from 1
func1=generate_range(1)

In [58]:
#range from 1 to 100
func1(100)

range(1, 100)

In [59]:
#func11 is a lambda function that will generate ranges starting from 11
func11=generate_range(11)

In [60]:
#range from 11 to 100
func11(100)

range(11, 100)

## Another case for lambdas 

In [61]:
list_of_topics=['python-programming','mysql','web-scrapping','tableau','data-viz','statistics','machine-learning','neural-nets']


In [62]:
# list comprehension to do text transformations
[' '.join(i.split('-')).title() for i in list_of_topics]

['Python Programming',
 'Mysql',
 'Web Scrapping',
 'Tableau',
 'Data Viz',
 'Statistics',
 'Machine Learning',
 'Neural Nets']

In [63]:
#same but simplier
[i.replace('-',' ').title() for i in list_of_topics]

['Python Programming',
 'Mysql',
 'Web Scrapping',
 'Tableau',
 'Data Viz',
 'Statistics',
 'Machine Learning',
 'Neural Nets']

In [64]:
#same using lambda instead of list comprehension
new_list_of_topics=list(map(lambda x: x.replace('-',' ').title(), list_of_topics))
new_list_of_topics

['Python Programming',
 'Mysql',
 'Web Scrapping',
 'Tableau',
 'Data Viz',
 'Statistics',
 'Machine Learning',
 'Neural Nets']

In [65]:
#dictionary with length of each topic
dct_of_topics={i:len(i) for i in new_list_of_topics}
dct_of_topics


{'Python Programming': 18,
 'Mysql': 5,
 'Web Scrapping': 13,
 'Tableau': 7,
 'Data Viz': 8,
 'Statistics': 10,
 'Machine Learning': 16,
 'Neural Nets': 11}

In [66]:
#approach we learnt previously
from operator import itemgetter
dict(sorted(dct_of_topics.items(), key=itemgetter(1)))

{'Mysql': 5,
 'Tableau': 7,
 'Data Viz': 8,
 'Statistics': 10,
 'Neural Nets': 11,
 'Web Scrapping': 13,
 'Machine Learning': 16,
 'Python Programming': 18}

In [67]:
#lambda approach
(sorted(dct_of_topics.items(),key=lambda x: x[1]))

[('Mysql', 5),
 ('Tableau', 7),
 ('Data Viz', 8),
 ('Statistics', 10),
 ('Neural Nets', 11),
 ('Web Scrapping', 13),
 ('Machine Learning', 16),
 ('Python Programming', 18)]

## MAP 

In [68]:
df=pd.read_csv('https://ironhack.school/asset-v1:IRONHACK+DAFT+201910_PAR+type@asset+block@IMDB-Movie-Data.csv')


In [69]:
df.head()


Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0


In [70]:
df.Rating.max()


9.0

In [71]:
df.Rating.min()


1.9

In [72]:
#since originally it is up to 10, division by half is more than enough
display((df.Rating/2).head())


0    4.05
1    3.50
2    3.65
3    3.60
4    3.10
Name: Rating, dtype: float64

In [73]:
#or use map
df.Rating.map(lambda x: x/2).head()

0    4.05
1    3.50
2    3.65
3    3.60
4    3.10
Name: Rating, dtype: float64

In [74]:
#takes Series as argument
def range_numbers(x):
    return x.max()-x.min()

In [78]:
df[['Rating']].apply(range_numbers)


Rating    7.1
dtype: float64

In [76]:
df[['Rating']].max()-df[['Rating']].min()


Rating    7.1
dtype: float64

In [79]:
df_n=df._get_numeric_data().copy()

In [81]:
df_n

Unnamed: 0,Rank,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,1,2014,121,8.1,757074,333.13,76.0
1,2,2012,124,7.0,485820,126.46,65.0
2,3,2016,117,7.3,157606,138.12,62.0
3,4,2016,108,7.2,60545,270.32,59.0
4,5,2016,123,6.2,393727,325.02,40.0
...,...,...,...,...,...,...,...
995,996,2015,111,6.2,27585,,45.0
996,997,2007,94,5.5,73152,17.54,46.0
997,998,2008,98,6.2,70699,58.01,50.0
998,999,2014,93,5.6,4881,,22.0


In [82]:
def half(x):
    return x/2

In [83]:
def missing_col(x):
    return x.isna().sum()

In [84]:
display(df_n.apply(half).head())

Unnamed: 0,Rank,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,0.5,1007.0,60.5,4.05,378537.0,166.565,38.0
1,1.0,1006.0,62.0,3.5,242910.0,63.23,32.5
2,1.5,1008.0,58.5,3.65,78803.0,69.06,31.0
3,2.0,1008.0,54.0,3.6,30272.5,135.16,29.5
4,2.5,1008.0,61.5,3.1,196863.5,162.51,20.0


In [85]:
df_n.apply(lambda x: x/2).head()

Unnamed: 0,Rank,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,0.5,1007.0,60.5,4.05,378537.0,166.565,38.0
1,1.0,1006.0,62.0,3.5,242910.0,63.23,32.5
2,1.5,1008.0,58.5,3.65,78803.0,69.06,31.0
3,2.0,1008.0,54.0,3.6,30272.5,135.16,29.5
4,2.5,1008.0,61.5,3.1,196863.5,162.51,20.0


In [86]:
df_n.apply(missing_col)

Rank                    0
Year                    0
Runtime (Minutes)       0
Rating                  0
Votes                   0
Revenue (Millions)    128
Metascore              64
dtype: int64

In [87]:
df_n.apply(sum)

Rank                     500500.0
Year                    2012783.0
Runtime (Minutes)        113172.0
Rating                     6723.2
Votes                 169808255.0
Revenue (Millions)            NaN
Metascore                     NaN
dtype: float64

In [88]:
df_n.apply(sum, axis=1)


0      759627.23
1      488156.46
2      159949.42
3       63009.52
4      396242.22
         ...    
995          NaN
996     76319.04
997     73917.21
998          NaN
999     15573.94
Length: 1000, dtype: float64

## Select action movies

In [89]:
df.loc[df.Genre.str.contains('Action')].head()


Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0
5,6,The Great Wall,"Action,Adventure,Fantasy",European mercenaries searching for black powde...,Yimou Zhang,"Matt Damon, Tian Jing, Willem Dafoe, Andy Lau",2016,103,6.1,56036,45.13,42.0
8,9,The Lost City of Z,"Action,Adventure,Biography","A true-life drama, centering on British explor...",James Gray,"Charlie Hunnam, Robert Pattinson, Sienna Mille...",2016,141,7.1,7188,8.01,78.0
12,13,Rogue One,"Action,Adventure,Sci-Fi",The Rebel Alliance makes a risky move to steal...,Gareth Edwards,"Felicity Jones, Diego Luna, Alan Tudyk, Donnie...",2016,133,7.9,323118,532.17,65.0


In [90]:
df[(df.Genre=='Action')|(df.Genre=='Thriller')].head()


Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
123,124,Boyka: Undisputed IV,Action,In the fourth installment of the fighting fran...,Todor Chapkanov,"Scott Adkins, Teodora Duhovnikova, Alon Aboutb...",2016,86,7.4,10428,,
282,283,Death Proof,Thriller,Two separate sets of voluptuous women are stal...,Quentin Tarantino,"Kurt Russell, Zoë Bell, Rosario Dawson, Vaness...",2007,113,7.1,220236,,
289,290,Iris,Thriller,"Iris, young wife of a businessman, disappears ...",Jalil Lespert,"Romain Duris, Charlotte Le Bon, Jalil Lespert,...",2016,99,6.1,726,,
444,445,The Thinning,Thriller,"""The Thinning"" takes place in a post-apocalypt...",Michael J. Gallagher,"Logan Paul, Peyton List, Lia Marie Johnson,Cal...",2016,81,6.0,4531,,31.0
580,581,Kickboxer: Vengeance,Action,A kick boxer is out to avenge his brother.,John Stockwell,"Dave Bautista, Alain Moussi, Gina Carano, Jean...",2016,90,4.9,6809,131.56,37.0


In [102]:
# Without lambda    (HARDDDDDDD)
dfn=df[(df.Genre=='Action')|((df.Genre.str.split(',').map(len)==2)&(df.Genre.str.contains('Action'))&(df.Genre.str.split(',').apply(set).map(lambda x: (x-set(['Action','Thriller','Adventure']))==set())))];

In [103]:
dfn

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
17,18,Jason Bourne,"Action,Thriller",The CIA's most dangerous former operative is d...,Paul Greengrass,"Matt Damon, Tommy Lee Jones, Alicia Vikander,V...",2016,123,6.7,150823,162.16,58.0
123,124,Boyka: Undisputed IV,Action,In the fourth installment of the fighting fran...,Todor Chapkanov,"Scott Adkins, Teodora Duhovnikova, Alon Aboutb...",2016,86,7.4,10428,,
124,125,The Dark Knight Rises,"Action,Thriller",Eight years after the Joker's reign of anarchy...,Christopher Nolan,"Christian Bale, Tom Hardy, Anne Hathaway,Gary ...",2012,164,8.5,1222645,448.13,78.0
284,285,Hercules,"Action,Adventure","Having endured his legendary twelve labors, He...",Brett Ratner,"Dwayne Johnson, John Hurt, Ian McShane, Joseph...",2014,98,6.0,122838,72.66,47.0
344,345,Spider-Man 3,"Action,Adventure",A strange black entity from another world bond...,Sam Raimi,"Tobey Maguire, Kirsten Dunst, Topher Grace, Th...",2007,139,6.2,406219,336.53,59.0
368,369,The Amazing Spider-Man,"Action,Adventure",After Peter Parker is bitten by a genetically ...,Marc Webb,"Andrew Garfield, Emma Stone, Rhys Ifans, Irrfa...",2012,136,7.0,474320,262.03,66.0
410,411,Taken,"Action,Thriller",A retired CIA agent travels across Europe and ...,Pierre Morel,"Liam Neeson, Maggie Grace, Famke Janssen, Lela...",2008,93,7.8,502961,145.0,50.0
453,454,Olympus Has Fallen,"Action,Thriller",Disgraced Secret Service agent (and former pre...,Antoine Fuqua,"Gerard Butler, Aaron Eckhart, Morgan Freeman,A...",2013,119,6.5,214994,98.9,41.0
580,581,Kickboxer: Vengeance,Action,A kick boxer is out to avenge his brother.,John Stockwell,"Dave Bautista, Alain Moussi, Gina Carano, Jean...",2016,90,4.9,6809,131.56,37.0
624,625,Taken 3,"Action,Thriller",Ex-government operative Bryan Mills is accused...,Olivier Megaton,"Liam Neeson, Forest Whitaker, Maggie Grace,Fam...",2014,109,6.0,144715,89.25,26.0


In [104]:
# Lambda
df[df.Genre.str.split(',').map(set).apply(lambda x:
                                          (x=={'Action'}) |
                                          ((len(x)==2)& 
                                           ('Action' in x) &
                                           ((x-set(['Action','Thriller','Adventure']))==set())))];

In [None]:
#Step by step conditions
df.Genre=='Action';                         #movie has only 1 genre which is Action
df.Genre.str.split(',').map(len)==2;        #movie has only 2 genres
df.Genre.str.contains('Action');            #movie has Action as genre
df.Genre.str.split(',').apply(set).map(lambda x: (x-set(['Action','Thriller','Adventure']))==set()); #Genre of the movie is one of the combinations between Action, Thriller and Adveture