## Using lambda function inside Map

In [1]:
from pyspark import SparkContext

sc = SparkContext(appName = "UsingLambdaInsideMap")

In [3]:
log_of_songs = [
                 "Despacito", "All the stars", 
                 "Nice for What","Havana", 
                 "Radioactive", 
                 "Counting Stars", 
                 "Havana", 
                 "Despacito", 
                 "Nice for what"
                ]

In [6]:
distributed_songs_log = sc.parallelize(log_of_songs)                # sc.parallelize() makes an RDD from the dataset provided!

#### Creating a UDF that can be used inside the **map** function

In [7]:
def convert_song_to_lowercase(song):
    return song.lower()


#### Now lets pass this function inside the our **map** function....

In [14]:
dist_songs_log_revised = distributed_songs_log.map(convert_song_to_lowercase).collect()

In [12]:
distributed_songs_log.collect()                     # original dataset remain intact!

['Despacito',
 'All the stars',
 'Nice for What',
 'Havana',
 'Radioactive',
 'Counting Stars',
 'Havana',
 'Despacito',
 'Nice for what']

In [15]:
dist_songs_log_revised

['despacito',
 'all the stars',
 'nice for what',
 'havana',
 'radioactive',
 'counting stars',
 'havana',
 'despacito',
 'nice for what']

#### Another way of doing the above manipulation can be done by wrapping the str.lower() function with lambda function inside map, rather creating a UDF

In [19]:
revising_song_log_with_lambda = distributed_songs_log.map(lambda song: song.lower()).collect()

In [21]:
distributed_songs_log.collect()

['Despacito',
 'All the stars',
 'Nice for What',
 'Havana',
 'Radioactive',
 'Counting Stars',
 'Havana',
 'Despacito',
 'Nice for what']

In [20]:
revising_song_log_with_lambda

['despacito',
 'all the stars',
 'nice for what',
 'havana',
 'radioactive',
 'counting stars',
 'havana',
 'despacito',
 'nice for what']