# 

# <center>List comprehension, map, reduce, filter, apply

---

In [1]:
# Imports
import numpy as np
import pandas as pd

In [2]:
# Define empty dataset
df = pd.DataFrame({'col1': [1,2,3,4,5],
                     'col2': ['a','e','i','o','u']})

# View data
df

Unnamed: 0,col1,col2
0,1,a
1,2,e
2,3,i
3,4,o
4,5,u


---

## Loop for vs List Comprehension

In [3]:
# loop for
data = []

for i in range(10):
    data.append(i**2)

print(data)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [4]:
# [item for item in list]
data = [i**2 for i in range(10)]

print(data)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [5]:
# loop for
data = []

for v in df['col2']:
    data.append(str(v).upper())

print(data)

['A', 'E', 'I', 'O', 'U']


In [6]:
# [item for item in list]
data = [str(item).upper() for item in df['col2']]

print(data)

['A', 'E', 'I', 'O', 'U']


## List Comprehension with ONE condition

In [7]:
# loop for -- append even numbers
even = []
for i in range(20):
    if i % 2 == 0:
        even.append(i)

print(even)

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]


In [8]:
# [item for item in list if condition]
even = [i for i in range(20) if i % 2 == 0]

print(even)

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]


## List Comprehension with TWO conditions

In [9]:
# loop for -- append if number is divisible by five and six

result = []

for i in range(100):
    if i % 5 == 0:
        if i % 6 == 0:
            result.append(i)

print(result)

[0, 30, 60, 90]


In [10]:
# [item for item in list if condition if condition]
result = [num for num in range(100) if num % 5 == 0 if num % 6 == 0]

print(result)

[0, 30, 60, 90]


## List Comprehension with elif

In [11]:
# loop for -- Append even numbers and if number is six multiply by 100

even_aj = []
for i in range(20):
    if i == 6:
        even_aj.append(i*100)
    elif i % 2 == 0:
        even_aj.append(i)

print(even_aj)

[0, 2, 4, 600, 8, 10, 12, 14, 16, 18]


In [12]:
# [item if item condition else something for item in list if condition]
even_aj = [i if i !=6 else i*100 
           for i in range(20) 
           if i % 2 == 0]

print(even_aj)

[0, 2, 4, 600, 8, 10, 12, 14, 16, 18]


## List Comprehension with if else

In [13]:
result = []

# loop for
for i in range(16):
    if i % 5 == 0:
        result.append(1)
    else:
        result.append(0)

print(result)

[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1]


In [14]:
# [item if condition else something for item in list]
result = [1 if i % 5 == 0 else 0 for i in range(16)]

print(result)

[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1]


## List Comprehension with nested loop

In [15]:
# Nasted Loop
rows_and_cols = [] 

for i in range(4):
    for j in range(3):
        rows_and_cols.append([i, j])
        
print(rows_and_cols)

[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2], [2, 0], [2, 1], [2, 2], [3, 0], [3, 1], [3, 2]]


In [16]:
# [[i,j] for i in list_i for j in list_j]
rows_and_cols = [[x, y] for x in range(4) for y in range(3)]

print(rows_and_cols)

[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2], [2, 0], [2, 1], [2, 2], [3, 0], [3, 1], [3, 2]]


## List comprehension with nasted loop and conditions

In [17]:
rows_and_cols = []

for x in range(4):
    for y in range(3):
        if y != 2:
            rows_and_cols.append([x, y])
        else:
            rows_and_cols.append([x*100, y*100])
            
print(rows_and_cols)
%timeit max(range(10))

[[0, 0], [0, 1], [0, 200], [1, 0], [1, 1], [100, 200], [2, 0], [2, 1], [200, 200], [3, 0], [3, 1], [300, 200]]
448 ns ± 6.84 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [18]:
# [[i,j] if condition else something for i in list_i for j in list_j]
rows_and_cols = [] 

# list comprehension
rows_and_cols = [
    [x, y]
    if y !=2 else [x*100, y*100]
    for x in range(4)
    for y in range(3)
]

print(rows_and_cols)

[[0, 0], [0, 1], [0, 200], [1, 0], [1, 1], [100, 200], [2, 0], [2, 1], [200, 200], [3, 0], [3, 1], [300, 200]]


## Loops and List Comp with map, reduce and filter

### map()

In [19]:
# Some data
list_1 = [1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121]
list_2 = []

In [20]:
# loop for
for i in list_1:
    list_2.append(i**(1/2))

print(list_2)

[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]


In [21]:
# list comp
list_2 = [i**(1/2) for i in list_1]

print(list_2)

[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]


In [22]:
# map
import math

list_2 = list(map(math.sqrt, list_1))

print(list_2)

[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]


## reduce()

In [23]:
# Import reduce
from functools import reduce

# Some data
list_1 = range(1000)

In [24]:
# loop for
sum_values = 0

for i in list_1:
    sum_values += i
    
print(sum_values)

499500


In [25]:
# Function
def sumFunc(a,b):
    x = a + b
    return x

sum_values = reduce(sumFunc, list_1)

print(sum_values)

499500


In [26]:
# lambda
sum_values = reduce((lambda a,b: a + b), list_1)

print(sum_values)

499500


## filter()

In [27]:
# some data
df = pd.DataFrame([10, 4, -1, 3, 5, -9, -11], columns = ['col1'])

df

Unnamed: 0,col1
0,10
1,4
2,-1
3,3
4,5
5,-9
6,-11


In [28]:
# loop for
filter_values = []

for i in df['col1']:
    if i > 0:
        filter_values.append(i)
    
print(filter_values)

[10, 4, 3, 5]


In [29]:
# list comp
filter_values = [i for i in df['col1'] if i > 0]

print(filter_values)

[10, 4, 3, 5]


In [30]:
# filter
filter_values = list(filter((lambda x: x > 0), df['col1']))

print(filter_values)

[10, 4, 3, 5]


In [31]:
# apply
filter_values = df['col1'][df['col1'].apply(lambda x: x > 0)]

print(filter_values)

0    10
1     4
3     3
4     5
Name: col1, dtype: int64


# Runtime with different approaches

In [32]:
col_1 = range(101)

df = pd.DataFrame(col_1, columns = ['col_1'])

df

Unnamed: 0,col_1
0,0
1,1
2,2
3,3
4,4
...,...
96,96
97,97
98,98
99,99


In [33]:
# Loop for
col_2 = []
for i in df.col_1:
    col_2.append(np.sqrt(i).round(2))

print(col_2)
%timeit max(range(10))

[0.0, 1.0, 1.41, 1.73, 2.0, 2.24, 2.45, 2.65, 2.83, 3.0, 3.16, 3.32, 3.46, 3.61, 3.74, 3.87, 4.0, 4.12, 4.24, 4.36, 4.47, 4.58, 4.69, 4.8, 4.9, 5.0, 5.1, 5.2, 5.29, 5.39, 5.48, 5.57, 5.66, 5.74, 5.83, 5.92, 6.0, 6.08, 6.16, 6.24, 6.32, 6.4, 6.48, 6.56, 6.63, 6.71, 6.78, 6.86, 6.93, 7.0, 7.07, 7.14, 7.21, 7.28, 7.35, 7.42, 7.48, 7.55, 7.62, 7.68, 7.75, 7.81, 7.87, 7.94, 8.0, 8.06, 8.12, 8.19, 8.25, 8.31, 8.37, 8.43, 8.49, 8.54, 8.6, 8.66, 8.72, 8.77, 8.83, 8.89, 8.94, 9.0, 9.06, 9.11, 9.17, 9.22, 9.27, 9.33, 9.38, 9.43, 9.49, 9.54, 9.59, 9.64, 9.7, 9.75, 9.8, 9.85, 9.9, 9.95, 10.0]
447 ns ± 5.68 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [34]:
# List comp
col_2 = [np.sqrt(i).round(2) for i in df.col_1]

print(col_2)
%timeit max(range(10))

[0.0, 1.0, 1.41, 1.73, 2.0, 2.24, 2.45, 2.65, 2.83, 3.0, 3.16, 3.32, 3.46, 3.61, 3.74, 3.87, 4.0, 4.12, 4.24, 4.36, 4.47, 4.58, 4.69, 4.8, 4.9, 5.0, 5.1, 5.2, 5.29, 5.39, 5.48, 5.57, 5.66, 5.74, 5.83, 5.92, 6.0, 6.08, 6.16, 6.24, 6.32, 6.4, 6.48, 6.56, 6.63, 6.71, 6.78, 6.86, 6.93, 7.0, 7.07, 7.14, 7.21, 7.28, 7.35, 7.42, 7.48, 7.55, 7.62, 7.68, 7.75, 7.81, 7.87, 7.94, 8.0, 8.06, 8.12, 8.19, 8.25, 8.31, 8.37, 8.43, 8.49, 8.54, 8.6, 8.66, 8.72, 8.77, 8.83, 8.89, 8.94, 9.0, 9.06, 9.11, 9.17, 9.22, 9.27, 9.33, 9.38, 9.43, 9.49, 9.54, 9.59, 9.64, 9.7, 9.75, 9.8, 9.85, 9.9, 9.95, 10.0]
410 ns ± 25.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [35]:
# map
col_2 = np.around(list(map(np.sqrt, df.col_1)), 2)

print(col_2)
%timeit max(range(10))

[ 0.    1.    1.41  1.73  2.    2.24  2.45  2.65  2.83  3.    3.16  3.32
  3.46  3.61  3.74  3.87  4.    4.12  4.24  4.36  4.47  4.58  4.69  4.8
  4.9   5.    5.1   5.2   5.29  5.39  5.48  5.57  5.66  5.74  5.83  5.92
  6.    6.08  6.16  6.24  6.32  6.4   6.48  6.56  6.63  6.71  6.78  6.86
  6.93  7.    7.07  7.14  7.21  7.28  7.35  7.42  7.48  7.55  7.62  7.68
  7.75  7.81  7.87  7.94  8.    8.06  8.12  8.19  8.25  8.31  8.37  8.43
  8.49  8.54  8.6   8.66  8.72  8.77  8.83  8.89  8.94  9.    9.06  9.11
  9.17  9.22  9.27  9.33  9.38  9.43  9.49  9.54  9.59  9.64  9.7   9.75
  9.8   9.85  9.9   9.95 10.  ]
424 ns ± 21.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [36]:
# apply
col_2 = df.col_1.apply(lambda x: np.sqrt(x).round(2))

print(list(col_2))
%timeit max(range(10))

[0.0, 1.0, 1.41, 1.73, 2.0, 2.24, 2.45, 2.65, 2.83, 3.0, 3.16, 3.32, 3.46, 3.61, 3.74, 3.87, 4.0, 4.12, 4.24, 4.36, 4.47, 4.58, 4.69, 4.8, 4.9, 5.0, 5.1, 5.2, 5.29, 5.39, 5.48, 5.57, 5.66, 5.74, 5.83, 5.92, 6.0, 6.08, 6.16, 6.24, 6.32, 6.4, 6.48, 6.56, 6.63, 6.71, 6.78, 6.86, 6.93, 7.0, 7.07, 7.14, 7.21, 7.28, 7.35, 7.42, 7.48, 7.55, 7.62, 7.68, 7.75, 7.81, 7.87, 7.94, 8.0, 8.06, 8.12, 8.19, 8.25, 8.31, 8.37, 8.43, 8.49, 8.54, 8.6, 8.66, 8.72, 8.77, 8.83, 8.89, 8.94, 9.0, 9.06, 9.11, 9.17, 9.22, 9.27, 9.33, 9.38, 9.43, 9.49, 9.54, 9.59, 9.64, 9.7, 9.75, 9.8, 9.85, 9.9, 9.95, 10.0]
413 ns ± 4.81 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


---