# 列表推导

In [3]:
result=[i for i in range(1000) if i%2==0]
# result

In [4]:
str_lst=['welcome','to','python','data','analysis','course']
result3=[x.upper() for x in str_lst if len(x)>4]
print(result3)

['WELCOME', 'PYTHON', 'ANALYSIS', 'COURSE']


# 字典推导式

In [5]:
dict={key:value for key,value in enumerate(reversed(range(10)))}
print(dict)

{0: 9, 1: 8, 2: 7, 3: 6, 4: 5, 5: 4, 6: 3, 7: 2, 8: 1, 9: 0}


# 集合推导式

In [7]:
set1={i for i in range(10)}
print(set1)

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}


# 嵌套推导式

In [9]:
lists=[list(range(10)),list(range(10,20))]
print(lists)

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]]


In [15]:
evens=[item for lst in lists for item in lst if item%2==0]
print(evens)
print(type(evens))
#与下式等价

a=[]
for item in lists:
    for item in item:
        if item%2==0:
            a.append(item)
print(a)
print(type(a))

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
<class 'list'>
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
<class 'list'>


# 多函数模式

In [20]:
# 处理字符串
str_lst=['$1.23','$1123.454','$899.12312',]
def remove_space(str):
    """
        remove space
    """
    str_no_space=str.replace(' ','')
    return str_no_space

def remove_dollar(str):
    """
        remove $
    """
    if '$'in str:
        return str.replace('$','')
    else:
        return str

def clean_str_lst(str_lst,operations):
    """
        clean string list
    """
    result=[]
    for item in str_lst:
        for op in operations:
            item = op(item)
        result.append(item)
    return result

clean_operations=[remove_space,remove_dollar]
result=clean_str_lst(str_lst,clean_operations)
print(result)

['1.23', '1123.454', '899.12312']


# 匿名函数

In [22]:
f=lambda x:x**2
f(2)

4

In [26]:
str_lst=['welcome','to','python','data','analysis','course']
str_lst.sort(key=lambda x:len(x)) # sort by length
print(str_lst)

str_lst.sort(key=lambda x:x[-1]) # sort by the last letter
print(str_lst)

['to', 'data', 'python', 'course', 'welcome', 'analysis']
['data', 'course', 'welcome', 'python', 'to', 'analysis']


# 生成器 generator

In [36]:
def gen_test():
    for i in range(3):
        yield i
        
gen=gen_test() # 此时不执行生成器
print(type(gen))
print(gen)

<class 'generator'>
<generator object gen_test at 0x0000022126D79888>


In [37]:
for i in gen:# 直到迭代时才执行
    print(i)

0
1
2


# python 高阶函数

* 函数式编程
    * 函数本身也可以赋值给变量

In [39]:
import math
math.sqrt(25)

5.0

In [40]:
math.sqrt

<function math.sqrt>

In [41]:
fun=math.sqrt
fun

<function math.sqrt>

In [42]:
fun(10)

3.1622776601683795

* 将函数作为参数

In [44]:
def func_add(x,y,f):
    """
        functional addtion
    """
    return f(x)+f(y)

print(func_add(4,25,math.sqrt))
print(func_add(-4,25,abs))

7.0
29


# map/reduce
* map

In [51]:
x_2=[x**2 for x in range(10)]
print(x_2)
print(type(x_2))

x_sqrt_lst=map(math.sqrt,x_2)
print(x_sqrt_lst)

x_2_float_lst=map(float,x_2)
print(x_2_float_lst)

x_2_str_lst=map(str,x_2)
print(x_2_str_lst)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
<class 'list'>
<map object at 0x0000022126DA9438>
<map object at 0x0000022126DA9470>
<map object at 0x0000022126DA9240>


* reduce

In [55]:
from functools import reduce
str_lst=map(str,range(5)) # ['0','1', ...]
print(str_lst)

def make_num(str1,str2):
    return int(str1)*10+int(str2)

result=reduce(make_num,str_lst)
print(result)

<map object at 0x0000022126DA9668>
1234


# 规范字符串

In [59]:
name_lst=['poNNY MA','rObIN li','steve JOBS','BILL gates']
# title方法，使得首字母大写
standard_name_lst=map(str.title,name_lst)
print(standard_name_lst)
# 想要打印的结果需要使用list来转换
print(list(standard_name_lst))

<map object at 0x0000022126DA9A90>
['Ponny Ma', 'Robin Li', 'Steve Jobs', 'Bill Gates']


# filter

In [61]:
number_lst=range(-10,10)

def is_negative(x):
    return x<0

filtered_lst=filter(is_negative,number_lst)
print(number_lst)
# 想要打印结果必须先转换成列表
print(list(filtered_lst))
print(filtered_lst)

range(-10, 10)
[-10, -9, -8, -7, -6, -5, -4, -3, -2, -1]
<filter object at 0x0000022126DA9358>


# map reduce filter 与匿名函数
* map与匿名函数

In [63]:
x_lst=range(10)
result_lst=map(lambda item:item**2+item**3,x_lst)
print(x_lst)
print(result_lst)
print(list(result_lst))

range(0, 10)
<map object at 0x0000022126DA9128>
[0, 2, 12, 36, 80, 150, 252, 392, 576, 810]


In [65]:
x_lst=range(1,5)
product=reduce(lambda x,y:x*y,x_lst)
print(x_lst)
print(list(x_lst))
print(product)

range(1, 5)
[1, 2, 3, 4]
24


# filter与匿名函数

In [68]:
number_lst=range(-10,10)
filtered_lst=filter(lambda x:x<0,number_lst)
print(number_lst)
print(list(number_lst))
print(filtered_lst)
print(list(filtered_lst))

range(-10, 10)
[-10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
<filter object at 0x0000022126D84DA0>
[-10, -9, -8, -7, -6, -5, -4, -3, -2, -1]


# numpy
* ndarray

In [70]:
import numpy as np
# 生成指定维度的随机多维数据
data=np.random.rand(2,3)
print(data)
print(type(data))

[[0.32986272 0.26370346 0.25862918]
 [0.33943247 0.60381036 0.77918519]]
<class 'numpy.ndarray'>


* ndim,shape,dtype属性

In [72]:
print('维度个数',data.ndim)
print('各维度大小：',data.shape)
print('数据类型：',data.dtype)

维度个数 2
各维度大小： (2, 3)
数据类型： float64


# 创建ndarray

In [73]:
# list转换成ndarray
lst=range(10)
data=np.array(lst)
print(data)
print(data.shape)
print(data.ndim)

[0 1 2 3 4 5 6 7 8 9]
(10,)
1


In [74]:
# 嵌套序列转换为ndarray
lst2=[range(10),range(10)]
data=np.array(lst2)
print(data)
print(data.shape)

[[0 1 2 3 4 5 6 7 8 9]
 [0 1 2 3 4 5 6 7 8 9]]
(2, 10)


In [75]:
# np.zeros,np.ones,np.empty

# np.zeros
zeros_arr=np.zeros((3,4))
ones_arr=np.ones((2,3))
empty_arr=np.empty((3,3))
empty_int_arr=np.empty((3,3),int)
print(zeros_arr)
print(ones_arr)
print(empty_arr)
print(empty_int_arr)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[0.00000000e+000 0.00000000e+000 0.00000000e+000]
 [0.00000000e+000 0.00000000e+000 6.12641401e-321]
 [1.91077724e+214 6.01386193e-154 6.01347002e-154]]
[[ 972    0  411]
 [ 545  732    0]
 [ 411  545 1056]]


In [76]:
# np.arange()
print(np.arange(10))

[0 1 2 3 4 5 6 7 8 9]


# ndarray数据类型

In [77]:
# zeros_
# 跳过

# 矢量化

In [80]:
arr=np.array([[1,2,3],[4,5,6]])
print('元素相乘')
print(arr*arr)

print('矩阵相加')
print(arr+arr)

元素相乘
[[ 1  4  9]
 [16 25 36]]
矩阵相加
[[ 2  4  6]
 [ 8 10 12]]


In [81]:
# 矢量与标量运算
print(1./arr)
print(2.*arr)

[[1.         0.5        0.33333333]
 [0.25       0.2        0.16666667]]
[[ 2.  4.  6.]
 [ 8. 10. 12.]]


* 索引与切片

In [82]:
# 一维数组
arr1=np.arange(10)
print(arr1)

print(arr1[2:5])

[0 1 2 3 4 5 6 7 8 9]
[2 3 4]


In [83]:
# 多维数组
arr2=np.arange(12).reshape(3,4)
print(arr2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [84]:
print(arr2[1])
print(arr2[0:2,2:])
print(arr2[:,1:3])

[4 5 6 7]
[[2 3]
 [6 7]]
[[ 1  2]
 [ 5  6]
 [ 9 10]]


In [85]:
# 条件索引
# 找出data_arr中2015年后的数据
data_arr=np.random.rand(3,3)
print(data_arr)

year_arr=np.array([[2000,2001,2000],[2005,2002,2009],[2001,2003,2010]])
filtered_arr=data_arr[year_arr>=2005]
print(filtered_arr)

[[0.46974505 0.93971973 0.76820669]
 [0.03611819 0.31547965 0.8621926 ]
 [0.30823207 0.75932821 0.58275245]]
[0.03611819 0.8621926  0.58275245]


In [87]:
# 多个条件
filtered_arr=data_arr[(year_arr<=2005)&(year_arr%2==0)]
print(filtered_arr)

[0.46974505 0.76820669 0.31547965]


* 转置

In [88]:
arr=np.random.rand(2,3)
print(arr)
print(arr.transpose())

[[0.33012806 0.47486422 0.05028377]
 [0.29766439 0.84176894 0.69633062]]
[[0.33012806 0.29766439]
 [0.47486422 0.84176894]
 [0.05028377 0.69633062]]


In [89]:
arr3d=np.random.rand(2,3,4)
print(arr3d)
print('-'*25)
print(arr3d.transpose((1,0,2))) # 3x2x4

[[[0.59677978 0.49702209 0.4196907  0.03574513]
  [0.64045272 0.98864343 0.55709733 0.90197004]
  [0.56767785 0.8229236  0.52100975 0.12401919]]

 [[0.9018208  0.41461503 0.96423991 0.89667501]
  [0.51624226 0.59301734 0.91022057 0.89589989]
  [0.88300795 0.34162557 0.53422216 0.51631513]]]
-------------------------
[[[0.59677978 0.49702209 0.4196907  0.03574513]
  [0.9018208  0.41461503 0.96423991 0.89667501]]

 [[0.64045272 0.98864343 0.55709733 0.90197004]
  [0.51624226 0.59301734 0.91022057 0.89589989]]

 [[0.56767785 0.8229236  0.52100975 0.12401919]
  [0.88300795 0.34162557 0.53422216 0.51631513]]]


# 通用函数

In [91]:
arr=np.random.randn(2,3)
print(arr)
print(np.ceil(arr))
print(np.floor(arr))
print(np.rint(arr))
print(np.isnan(arr))

[[ 0.16894092  0.42867206 -0.78486066]
 [ 0.91543548 -2.33880745 -0.53548174]]
[[ 1.  1. -0.]
 [ 1. -2. -0.]]
[[ 0.  0. -1.]
 [ 0. -3. -1.]]
[[ 0.  0. -1.]
 [ 1. -2. -1.]]
[[False False False]
 [False False False]]


* ceil,向上最接近的整数
* floor，向下最接近的整数
* rint，四舍五入
* isnan，判断元素是否为NaN（not a number）
* multiply，元素相乘
* divide，元素相除

* np.where

In [94]:
arr=np.random.randn(3,4)
print(arr)
np.where(arr>0,1,-1)

[[ 0.01964135  1.15782183 -0.57807656 -0.29382371]
 [ 0.33385994  0.23738802 -0.19049067 -1.37837787]
 [ 0.30265272  0.73458507  0.91958623 -1.37446009]]


array([[ 1,  1, -1, -1],
       [ 1,  1, -1, -1],
       [ 1,  1,  1, -1]])

* 常用的统计方法

In [97]:
arr=np.arange(10).reshape(5,2)
print(arr)
print(np.sum(arr))
print(np.sum(arr,axis=0))
print(np.sum(arr,axis=1))

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
45
[20 25]
[ 1  5  9 13 17]


* np.all和np.any

In [99]:
arr=np.random.randn(2,3)
print(arr)
print(np.any(arr>0))
print(np.all(arr>0))

[[-0.2472489  -0.11884454  1.36611325]
 [-0.51972639 -1.15237055 -1.12232475]]
True
False


* np.unique
* 找到唯一值并返回结果