# Task1 数据类型及数据创建

## 1. 常量
### 1）numpy.nan

In [2]:
import numpy as np
np.nan == np.nan

False

In [3]:
x = np.array([1, 2, np.nan, 0, 10])
y = np.isnan(x)
print(y)

z = np.count_nonzero(y)
print(z)

[False False  True False False]
1


### 2)  numpy.inf 表示正无穷大
###      numpy.pi  表示圆周率
###      numpy.e   表示自然常数

In [5]:
np.pi

3.141592653589793

In [4]:
np.e

2.718281828459045

## 2. 数据类型
numpy的数值类型实际上是dtype对象的实例，每个内建类型都有一个唯一定义它的字符代码。

In [6]:
import numpy as np
a = np.dtype('b1')
print(a.type)
print(a.itemsize)

<class 'numpy.bool_'>
1


In [7]:
b = np.dtype('i1')
print(b.type)
print(b.itemsize)

c = np.dtype('i2')
print(c.type)
print(c.itemsize)

d = np.dtype('i4')
print(d.type)
print(d.itemsize)

e = np.dtype('i8')
print(e.type)
print(e.itemsize)

<class 'numpy.int8'>
1
<class 'numpy.int16'>
2
<class 'numpy.int32'>
4
<class 'numpy.int64'>
8


In [8]:
b = np.dtype('u1')
print(b.type)
print(b.itemsize)

c = np.dtype('u2')
print(c.type)
print(c.itemsize)

d = np.dtype('u4')
print(d.type)
print(d.itemsize)

e = np.dtype('u8')
print(e.type)
print(e.itemsize)

<class 'numpy.uint8'>
1
<class 'numpy.uint16'>
2
<class 'numpy.uint32'>
4
<class 'numpy.uint64'>
8


In [9]:
c = np.dtype('f2')
print(c.type)
print(c.itemsize)

d = np.dtype('f4')
print(d.type)
print(d.itemsize)

e = np.dtype('f8')
print(e.type)
print(e.itemsize)

a1 = np.dtype('S')
print(a1.type)
print(a1.itemsize)

a2 = np.dtype('S3')
print(a2.type)
print(a2.itemsize)

a3 = np.dtype('U3')
print(a3.type)
print(a3.itemsize)

<class 'numpy.float16'>
2
<class 'numpy.float32'>
4
<class 'numpy.float64'>
8
<class 'numpy.bytes_'>
0
<class 'numpy.bytes_'>
3
<class 'numpy.str_'>
12


Python的浮点数通常是64位浮点数， 几乎等同于np.float64
Python的int是灵活的，可以扩展以容纳任何整数并且不会溢出。

In [10]:
import numpy as np
i16 = np.iinfo(np.int16)
print(i16.min)
print(i16.max)

i32 = np.iinfo(np.int32)
print(i32.min)
print(i32.max)

-32768
32767
-2147483648
2147483647


In [11]:
f16 = np.finfo(np.float16)
print(f16.bits)
print(f16.min)
print(f16.max)
print(f16.eps)

f32 = np.finfo(np.float32)
print(f32.bits)
print(f32.min)
print(f32.max)
print(f32.eps)

16
-65500.0
65500.0
0.000977
32
-3.4028235e+38
3.4028235e+38
1.1920929e-07


## 3. 时间相关

从字符串创建datetime64类型时，默认情况下，numpy会根据字符串自动选择对应的单位。也可以强制指定使用的单位。

In [14]:
import numpy as np
a1 = np.datetime64('2020-10-19')
print(a1, a1.dtype)

a2 = np.datetime64('2020-10')
print(a2, a2.dtype)

a3 = np.datetime64('2020-10-19 20:01:35')
print(a3, a3.dtype)

a4 = np.datetime64('2020-10-19 20:01')
print(a4, a4.dtype)

a5 = np.datetime64('2020-10-19 20')
print(a5, a5.dtype)

2020-10-19 datetime64[D]
2020-10 datetime64[M]
2020-10-19T20:01:35 datetime64[s]
2020-10-19T20:01 datetime64[m]
2020-10-19T20 datetime64[h]


In [16]:
b1 = np.datetime64('2020-10', 'D')
print(b1, b1.dtype)

b2 = np.datetime64('2020-10', 'Y')
print(b2, b2.dtype)

print(np.datetime64('2020-10') == np.datetime64('2020-10-01'))
print(np.datetime64('2020-10') == np.datetime64('2020-10-02'))

2020-10-01 datetime64[D]
2020 datetime64[Y]
True
False


从字符串创建datetime64数组时，如果单位不统一，则一律转换成其中最小的单位

In [17]:
c1 = np.array(['2020-10', '2020-10-05', '2020-10-10 20:00'], dtype='datetime64')
print(c1, c1.dtype)

['2020-10-01T00:00' '2020-10-05T00:00' '2020-10-10T20:00'] datetime64[m]


使用arange()创建datetime64数组，用于生成日期范围

In [22]:
a1 = np.arange('2020-08', '2020-08-06', dtype=np.datetime64)
print(a1)
print(a1.dtype)

['2020-08-01' '2020-08-02' '2020-08-03' '2020-08-04' '2020-08-05']
datetime64[D]


### datetime64和timedelta64运算

timedelta64表示两个datetime64之间的差，timedelta也是带单位的，并且和相减运算中的两个datetime64中较小的单位保持一致。

In [23]:
a = np.datetime64('2020-10-08') - np.datetime64('2020-10-07')
b = np.datetime64('2020-10-08') - np.datetime64('2020-10-07 23:00')
c = np.datetime64('2020-10-08') - np.datetime64('2020-10-07 23:00', 'D')
print(a, a.dtype)
print(b, b.dtype)
print(c, c.dtype)

1 days timedelta64[D]
60 minutes timedelta64[m]
1 days timedelta64[D]


In [24]:
d = np.datetime64('2020-10') + np.timedelta64(20,'D')
e = np.datetime64('2020-10-15 00:00') + np.timedelta64(12, 'h')
print(d, d.dtype)
print(e, e.dtype)

2020-10-21 datetime64[D]
2020-10-15T12:00 datetime64[m]


生成timedelta64时，年（'Y'）和月('M')这两个单位是无法和其他单位进行运算的。因为一年有几天，一个月有几个小时，这些是不确定的

In [25]:
a = np.timedelta64(1, 'Y')
b = np.timedelta64(a, 'M')
print(a)
print(b)

c = np.timedelta64(1, 'h')
d = np.timedelta64(c, 'm')
print(c)
print(d)

1 years
12 months
1 hours
60 minutes


In [26]:
print(np.timedelta64(a, 'D'))
print(np.timedelta64(b, 'D'))

TypeError: Cannot cast NumPy timedelta64 scalar from metadata [Y] to [D] according to the rule 'same_kind'

In [27]:
a = np.timedelta64(1,'Y')
b = np.timedelta64(6,'M')
c = np.timedelta64(1,'W')
d = np.timedelta64(1,'D')
e = np.timedelta64(10,'D')

print(a)
print(b)
print(a + b)
print(a - b)
print(2 * a)
print(a / b)
print(c / d)
print(c % e)

1 years
6 months
18 months
6 months
2 years
2.0
7.0
7 days


numpy.datetime64和datetime.datetime相互转换

In [28]:
import datetime
dt = datetime.datetime(year=2020, month=10, day=1, hour=20, minute=5,second=30)
dt64 = np.datetime64(dt, 's')
print(dt64, dt64.dtype)

dt2 = dt64.astype(datetime.datetime)
print(dt2, type(dt2))

2020-10-01T20:05:30 datetime64[s]
2020-10-01 20:05:30 <class 'datetime.datetime'>


### datetime64的应用

In [31]:
import numpy as np
a = np.busday_offset('2020-10-16', offsets=1)
print(a)

2020-10-19


In [32]:
b = np.busday_offset('2020-10-17', offsets=1)
print(b)

ValueError: Non-business day date in busday_offset

In [34]:
a = np.busday_offset('2020-10-17', offsets=0, roll='forward')
b = np.busday_offset('2020-10-17', offsets=0, roll='backward')
print(a)
print(b)

2020-10-19
2020-10-16


In [36]:
a = np.is_busday('2020-07-10')
b = np.is_busday('2020-07-11')
print(a)
print(b)

True
False


In [39]:
start = np.datetime64('2020-07-10')
end = np.datetime64('2020-07-20')
a = np.arange(start, end, dtype='datetime64')
b1 = np.count_nonzero(np.is_busday(a))
b2 = np.busday_count(start, end)
print(a)
print(b1)
print(b2)

['2020-07-10' '2020-07-11' '2020-07-12' '2020-07-13' '2020-07-14'
 '2020-07-15' '2020-07-16' '2020-07-17' '2020-07-18' '2020-07-19']
6
6


## 4. 数组

### 1）根据现有数据来创建ndarray

### 通过array() 函数进行创建

In [2]:
import numpy as np
a = np.array([0, 1, 2, 3, 4])
b = np.array([[0, 2, 4]])
print(a, type(a))
print(b, type(b))

[0 1 2 3 4] <class 'numpy.ndarray'>
[[0 2 4]] <class 'numpy.ndarray'>


### 通过asarray()函数进行创建

array()和asarray()都可以将结构数据转化为 ndarray，但是array()和asarray()主要区别就是当数据源是ndarray 时，array()仍然会 copy 出一个副本，占用新的内存，但不改变 dtype 时 asarray()不会。

In [3]:
import numpy as np

x = [[1, 1, 1], [1, 1, 1], [1, 1, 1]]
y = np.array(x)
z = np.asarray(x)
x[1][2] = 2
print(x,type(x))
print(y,type(y))
print(z,type(z))

[[1, 1, 1], [1, 1, 2], [1, 1, 1]] <class 'list'>
[[1 1 1]
 [1 1 1]
 [1 1 1]] <class 'numpy.ndarray'>
[[1 1 1]
 [1 1 1]
 [1 1 1]] <class 'numpy.ndarray'>


In [5]:
import numpy as np

x = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]])
y = np.array(x)
z = np.asarray(x)
w = np.asarray(x, dtype=np.int)
x[1][2] = 2
print(x,type(x),x.dtype)

print(y,type(y),y.dtype)

print(z,type(z),z.dtype)

print(w,type(w),w.dtype)


[[1 1 1]
 [1 1 2]
 [1 1 1]] <class 'numpy.ndarray'> int64
[[1 1 1]
 [1 1 1]
 [1 1 1]] <class 'numpy.ndarray'> int64
[[1 1 1]
 [1 1 2]
 [1 1 1]] <class 'numpy.ndarray'> int64


### 通过fromfunction()函数进行创建

In [6]:
import numpy as np

def f(x, y):
    return 10 * x + y
x1 = np.fromfunction(f, (5, 4), dtype=int)
print(x1)

x2 = np.fromfunction(lambda i, j: i == j, (3, 3), dtype=int)
print(x2)

[[ 0  1  2  3]
 [10 11 12 13]
 [20 21 22 23]
 [30 31 32 33]
 [40 41 42 43]]
[[ True False False]
 [False  True False]
 [False False  True]]


### 2）根据 ones 和 zeros填充方式来创建

zeros()函数：返回给定形状和类型的零数组

zeros_like()函数： 返回与给定数组形状和类型相同的零数组

ones()函数： 返回给定形状和类型的1数组

ones_like()函数： 返回与给定数组形状和类型相同的1数组

In [9]:
x1 = np.zeros(5)
print(x1)

x2 = np.zeros([2, 3])
print(x2)

x3 = np.array([[1, 2, 3], [4, 5, 6]])
y = np.zeros_like(x3)
print(y)

[0. 0. 0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]]
[[0 0 0]
 [0 0 0]]


empty()函数： 返回一个空数组，数组元素为随机数

empty_like()函数： 返回与给定数组形状和类型相同的新数组

In [15]:
x1 = np.empty(10)
print(x1)

x2 = np.empty((3, 2))
print(x2)

x3 = np.array([[1, 2, 3], [4, 5, 6]])
y = np.empty_like(x3)
print(y)

[1.28822975e-231 1.28822975e-231 6.08419698e-310 2.24371118e-314
 1.00126704e-089 8.40835455e-315 0.00000000e+000 5.49839763e-315
 0.00000000e+000 6.95335581e-309]
[[1. 0.]
 [0. 0.]
 [1. 0.]]
[[0 0 0]
 [0 0 0]]


eye()函数:  返回一个对角线上为1， 其他地方为0的单位数组

identity():  返回一个方的单位数组

In [13]:
import numpy as np 
x1 = np.eye(4)
print(x1)

x2 = np.eye(2, 3)
print(x2)

x3 = np.identity(4)
print(x3)

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
[[1. 0. 0.]
 [0. 1. 0.]]
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


In [16]:
# diag()函数： 提取对角线或者构造对角数组
x = np.arange(9).reshape((3, 3))
print(x)

print(np.diag(x))
print(np.diag(x, k=1))   # 右上角
print(np.diag(x, k=-1)) # 左下角

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[0 4 8]
[1 5]
[3 7]


In [17]:
v = [1, 2, 3, 4]
x = np.diag(v)
print(x)

[[1 0 0 0]
 [0 2 0 0]
 [0 0 3 0]
 [0 0 0 4]]


full()函数：返回一个常数数组

full_like()函数：返回与给定数组具有相同形状和类型的常数数组

In [19]:
import numpy as np
x1 = np.full((2, ), 7)
print(x1)

x2 = np.full(2, 7)
print(x2)

x3 = np.array([[1, 2, 3], [6, 7, 8]])
y = np.full_like(x3, 7)
print(y)

[7 7]
[7 7]
[[7 7 7]
 [7 7 7]]


### 3) 用数值范围来创建ndarray

arange()函数：返回给定间隔内的均匀间隔的值

linspace()函数：返回指定间隔内的等间隔数字

logspace()函数：返回数以对数刻度均匀分布

numpy.random.random()：返回一个由【0，1】内的随机数组成的数组


In [22]:
x1 = np.arange(3, 7, 2)
print(x1)

x2 = np.linspace(start=0, stop=3, num=6)
print(x2)

x3 = np.logspace(-1, 2, 4)
print(x3)

[3 5]
[0.  0.6 1.2 1.8 2.4 3. ]
[  0.1   1.   10.  100. ]


In [23]:
x4 = np.random.random([2, 3])
print(x4)

[[0.49357435 0.84817721 0.81659682]
 [0.17686791 0.32069319 0.34159244]]


In [26]:
a = np.array([('LiMing', 24, 63.9), ('Mike', 15, 67.), ('Duo', 34, 45.8)])
print(a, type(a))

[['LiMing' '24' '63.9']
 ['Mike' '15' '67.0']
 ['Duo' '34' '45.8']] <class 'numpy.ndarray'>


### 4) 结构数组的创建
利用字典来定义结构

In [31]:
personType = np.dtype({
    'names': ['name', 'age', 'weight'],
    'formats': ['U30', 'i8', 'f8']})

a = np.array([('LiMing', 24, 63.9), ('Mike', 15, 67.), ('Duo', 34, 45.8)], dtype=personType)
print(a, type(a))

[('LiMing', 24, 63.9) ('Mike', 15, 67. ) ('Duo', 34, 45.8)] <class 'numpy.ndarray'>


In [34]:
print(a[0])
print(a[-2:])

('LiMing', 24, 63.9)
[('Mike', 15, 67. ) ('Duo', 34, 45.8)]


In [35]:
print(a['name'])
print(a['age'])

['LiMing' 'Mike' 'Duo']
[24 15 34]


### 5) 数组的属性

numpy.ndarray.ndim用于返回数组的维数（轴的个数）也称为秩，一维数组的秩为 1，二维数组的秩为 2，以此类推。

numpy.ndarray.shape表示数组的维度，返回一个元组，这个元组的长度就是维度的数目，即 ndim 属性(秩)。

numpy.ndarray.size数组中所有元素的总量，相当于数组的shape中所有元素的乘积，例如矩阵的元素总量为行与列的乘积。

numpy.ndarray.dtype ndarray 对象的元素类型。

numpy.ndarray.itemsize以字节的形式返回数组中每一个元素的大小。


In [36]:
## 在ndarray中所有元素必须是同一类型，否则会自动向下转换，int->float->str。

b = np.array([[1, 2, 3], [4, 5, 6.0]])
print(b.shape) 
print(b.dtype) 
print(b.size)
print(b.ndim)
print(b.itemsize)

(2, 3)
float64
6
2
8
