### https://www.jianshu.com/p/c52968ab9c3d

https://www.analyticsvidhya.com/blog/2016/01/python-tutorial-list-comprehension-examples/

# 应用1:平坦化矩阵（Flatten a Matrix）

In [1]:
def eg1_for(matrix):
    flat = []
    for row in matrix:
        for x in row:
            flat.append(x)
    return flat

def eg1_lc(matrix):
    return [x for row in matrix for x in row ]

In [2]:
matrix = [ range(0,5), range(5,10), range(10,15) ]
print ("Original Matrix: " + str(matrix))
print ("FOR-loop result: " + str(eg1_for(matrix)))
print ("LC result      : " + str(eg1_lc(matrix)))

Original Matrix: [range(0, 5), range(5, 10), range(10, 15)]
FOR-loop result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
LC result      : [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]


# 应用二：将一个句子中的元音字母去掉

In [5]:
def eg2_for(sentence):
    vowels = 'aeiou'
    filtered_list = []
    for l in sentence:
        if l not in vowels:
            filtered_list.append(l)
    return ''.join(filtered_list)

def eg2_lc(sentence):
    vowels = 'aeiou'
    return ''.join([ l for l in sentence if l not in vowels])

In [7]:
sentence = 'My name is Aarshay Jain!'
print ("FOR-loop result: " + eg2_for(sentence))
print ("LC result      : " + eg2_lc(sentence))

FOR-loop result: My nm s Arshy Jn!
LC result      : My nm s Arshy Jn!


# 应用三：字典推导式（ Dictionary Comprehension）

In [8]:
def eg3_for(keys, values):
    dic = {}
    for i in range(len(keys)):
        dic[keys[i]] = values[i]
    return dic

def eg3_lc(keys, values):
    return { keys[i] : values[i] for i in range(len(keys)) }


In [10]:
country = ['India', 'Pakistan', 'Nepal', 'Bhutan', 'China', 'Bangladesh']
capital = ['New Delhi', 'Islamabad','Kathmandu', 'Thimphu', 'Beijing', 'Dhaka']
print ("FOR-loop result: " + str(eg3_for(country, capital)))
print ("LC result      : " + str(eg3_lc(country, capital)))

FOR-loop result: {'India': 'New Delhi', 'Pakistan': 'Islamabad', 'Nepal': 'Kathmandu', 'Bhutan': 'Thimphu', 'China': 'Beijing', 'Bangladesh': 'Dhaka'}
LC result      : {'India': 'New Delhi', 'Pakistan': 'Islamabad', 'Nepal': 'Kathmandu', 'Bhutan': 'Thimphu', 'China': 'Beijing', 'Bangladesh': 'Dhaka'}


# 应用4：读双重List

In [15]:
import pandas as pd
data = pd.read_csv("skills.csv")
print (data)

    personID                            skills
0          1      cricket;tabletennis;football
1          2             tabletennis;badminton
2          3                       tabletennis
3          4    cricket;tabletennis;volleyball
4          5               football;volleyball
5          6              tabletennis;football
6          7      cricket;volleyball;badminton
7          8                  football;cricket
8          9  badminton;tabletennis;volleyball
9         10                cricket;volleyball
10        11                football;badminton
11        12       cricket;volleyball;football
12        13               football;volleyball
13        14                         badminton
14        15      cricket;football;tabletennis


In [16]:
#Split text with the separator ';'
data['skills_list'] = data['skills'].apply(lambda x: x.split(';'))
print (data['skills_list'])

0         [cricket, tabletennis, football]
1                 [tabletennis, badminton]
2                            [tabletennis]
3       [cricket, tabletennis, volleyball]
4                   [football, volleyball]
5                  [tabletennis, football]
6         [cricket, volleyball, badminton]
7                      [football, cricket]
8     [badminton, tabletennis, volleyball]
9                    [cricket, volleyball]
10                   [football, badminton]
11         [cricket, volleyball, football]
12                  [football, volleyball]
13                             [badminton]
14        [cricket, football, tabletennis]
Name: skills_list, dtype: object


In [17]:
#Initialize the set
skills_unq = set()
#Update each entry into set. Since it takes only unique value, duplicates will be ignored automatically.
skills_unq.update( (sport for l in data['skills_list'] for sport in l) )
print (skills_unq)

{'football', 'volleyball', 'cricket', 'badminton', 'tabletennis'}


In [20]:
#Convert set to list:
skills_unq = list(skills_unq)
sport_matrix = [ [1 if skill in row else 0 for skill in skills_unq] for row in data['skills_list']  ]
print (sport_matrix,end="")

[[1, 0, 1, 0, 1], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1], [0, 1, 1, 0, 1], [1, 1, 0, 0, 0], [1, 0, 0, 0, 1], [0, 1, 1, 1, 0], [1, 0, 1, 0, 0], [0, 1, 0, 1, 1], [0, 1, 1, 0, 0], [1, 0, 0, 1, 0], [1, 1, 1, 0, 0], [1, 1, 0, 0, 0], [0, 0, 0, 1, 0], [1, 0, 1, 0, 1]]

In [22]:
data = pd.concat([data, pd.DataFrame(sport_matrix,columns=skills_unq)],axis=1)
data

Unnamed: 0,personID,skills,skills_list,football,volleyball,cricket,badminton,tabletennis
0,1,cricket;tabletennis;football,"[cricket, tabletennis, football]",1,0,1,0,1
1,2,tabletennis;badminton,"[tabletennis, badminton]",0,0,0,1,1
2,3,tabletennis,[tabletennis],0,0,0,0,1
3,4,cricket;tabletennis;volleyball,"[cricket, tabletennis, volleyball]",0,1,1,0,1
4,5,football;volleyball,"[football, volleyball]",1,1,0,0,0
5,6,tabletennis;football,"[tabletennis, football]",1,0,0,0,1
6,7,cricket;volleyball;badminton,"[cricket, volleyball, badminton]",0,1,1,1,0
7,8,football;cricket,"[football, cricket]",1,0,1,0,0
8,9,badminton;tabletennis;volleyball,"[badminton, tabletennis, volleyball]",0,1,0,1,1
9,10,cricket;volleyball,"[cricket, volleyball]",0,1,1,0,0


# 应用5:为多项式回归创建一列权值（Creating powers of a columns for Polynomial regression）

In [23]:
data2 = pd.DataFrame([1,2,3,4,5], columns=['number'])
data2

Unnamed: 0,number
0,1
1,2
2,3
3,4
4,5


In [24]:
#Define the degree:
deg=6
#Create the matrix:
power_matrix = [ [i**p for p in range(2,deg+1) ] for i in data2['number'] ]  
print (power_matrix)


[[1, 1, 1, 1, 1], [4, 8, 16, 32, 64], [9, 27, 81, 243, 729], [16, 64, 256, 1024, 4096], [25, 125, 625, 3125, 15625]]


In [26]:
cols = ['power_%d'%i for i in range(2,deg+1)]
data2 = pd.concat([data2, pd.DataFrame(power_matrix,columns=cols)],axis=1)
data2

Unnamed: 0,number,power_2,power_3,power_4,power_5,power_6,power_2.1,power_3.1,power_4.1,power_5.1,power_6.1
0,1,1,1,1,1,1,1,1,1,1,1
1,2,4,8,16,32,64,4,8,16,32,64
2,3,9,27,81,243,729,9,27,81,243,729
3,4,16,64,256,1024,4096,16,64,256,1024,4096
4,5,25,125,625,3125,15625,25,125,625,3125,15625


# 应用6:过滤列名（Filtering column names）

In [27]:
cols = ['a', 'b', 'c', 'd', 
        'a_transform', 'b_transform', 'c_transform', 
        'd_power2', 'd_power3', 'd_power4', 'd_power5',
        'temp1', 'temp2']

In [28]:
col_set1 = [x for x in cols if x.endswith('transform')]
col_set2 = [x for x in cols if 'power' in x]
col_set3 = [x for x in cols if (x.endswith('transform')) | ('power' in x)]
col_set4 = [x for x in cols if x not in ['temp1','temp2']]
print ('Set1: ', col_set1)
print ('Set2: ', col_set2)
print ('Set3: ', col_set3)
print ('Set4: ', col_set4)

Set1:  ['a_transform', 'b_transform', 'c_transform']
Set2:  ['d_power2', 'd_power3', 'd_power4', 'd_power5']
Set3:  ['a_transform', 'b_transform', 'c_transform', 'd_power2', 'd_power3', 'd_power4', 'd_power5']
Set4:  ['a', 'b', 'c', 'd', 'a_transform', 'b_transform', 'c_transform', 'd_power2', 'd_power3', 'd_power4', 'd_power5']


END