Some basic operations about numpy, including select, insert, delete, updata etc

In [4]:
import numpy as np

## Basic

### Types

In [116]:
# string type has length limit ! too short <U1 will cut integer
# https://stackoverflow.com/questions/9108837/numpy-recarray-strings-of-variable-length
a = np.array([[1,2,'3']])
a

array([['1', '2', '3']], dtype='<U21')

In [135]:
# string type has length limit ! too short <U1 will cut integer
a.shape
a.tolist()

[['1', '2', '3']]

In [134]:
# string type has length limit ! too short <U1 will cut integer
a.astype(np.int32)

array([[1, 2, 3]], dtype=int32)

In [132]:
# string type has length limit ! too short <U1 will cut integer
b = np.array([1,2,3])
b[:2]

array([1, 2])

## Select & Cut

In [5]:
# create data for test
arr = np.arange(12).reshape((3,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

### Select line

In [6]:
# get first line
arr[0]

array([0, 1, 2, 3])

In [7]:
# get first and last line
arr[[0,-1]]

array([[ 0,  1,  2,  3],
       [ 8,  9, 10, 11]])

In [8]:
# select lines where the second column value > 4 and third column > 6
# https://stackoverflow.com/questions/29501823/numpy-filtering-rows-by-multiple-conditions
arr_new = arr[(arr[:, 1] > 4) & (arr[:, 2] > 6)]
arr_new

array([[ 8,  9, 10, 11]])

### Select column

In [9]:
# get first column
arr[:, 0]

array([0, 4, 8])

In [10]:
# get first and last column
arr[:, [0,-1]]

array([[ 0,  3],
       [ 4,  7],
       [ 8, 11]])

In [11]:
# get columns where the first row < 2
# https://stackoverflow.com/questions/48523663/filter-the-columns-of-a-2d-numpy-array-by-only-one-line
ind = np.where(arr[0,:] <2)
col = arr[:, ind]
col

array([[[0, 1]],

       [[4, 5]],

       [[8, 9]]])

### Filter

In [12]:
arr[np.where(arr < 2)]

array([0, 1])

### Cut

In [122]:
np.hsplit(arr, 2)

[array([[0, 1],
        [4, 5],
        [8, 9]]), array([[ 2,  3],
        [ 6,  7],
        [10, 11]])]

## Insert

In [13]:
# data for test
data = np.array([[1,2],[3,4]])
empty = np.zeros((1, 2))
e_ = np.zeros((0,2))
data[0], empty

(array([1, 2]), array([[0., 0.]]))

### Insert item

In [14]:
np.insert(empty, 0, values=data[0], axis=0)

array([[1., 2.],
       [0., 0.]])

### Append array

In [15]:
np.concatenate((empty, data))
np.concatenate((e_, data))

array([[1., 2.],
       [3., 4.]])

In [16]:
np.append(empty,data[0])

array([0., 0., 1., 2.])

## Delete

Delete operations is similar to insert

In [17]:
# data for test
data = np.array(range(12)).reshape(3,4)

### Delete row

In [18]:
np.delete(data, (0), axis=0)

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

## Operations between arrays

### Comp

In [19]:
# check if any item is equl
(np.array([1])==arr).any()

True

In [20]:
# check if every item is equl
(arr==arr).all()

True

## Other

In [53]:
data = np.random.randint(0,20,size=(5,5))
data

array([[16, 15, 11,  1,  5],
       [13,  5,  1,  1, 12],
       [14,  1, 19,  8, 14],
       [18,  9, 10,  3,  4],
       [19, 17, 12, 12, 10]])

### Sort

> https://stackoverflow.com/questions/46289740/how-to-sort-a-2d-numpy-array-lexicographically-by-one-column

In [60]:
# sorted indices of the first column, asc
data[:,0].argsort()

array([1, 2, 0, 3, 4])

In [63]:
# sorted indices of the first column, desc
data[:,0].argsort()[::-1]

array([4, 3, 0, 2, 1])

In [57]:
# same
data[np.argsort(data[:, 0])]
data[data[:,0].argsort()]

array([[13,  5,  1,  1, 12],
       [14,  1, 19,  8, 14],
       [16, 15, 11,  1,  5],
       [18,  9, 10,  3,  4],
       [19, 17, 12, 12, 10]])

In [61]:
# reverse sort
data[np.argsort(data[:, 0])[::-1]]
data[data[:,0].argsort()[::-1]]

array([[19, 17, 12, 12, 10],
       [18,  9, 10,  3,  4],
       [16, 15, 11,  1,  5],
       [14,  1, 19,  8, 14],
       [13,  5,  1,  1, 12]])

### Uniq

In [74]:
d = np.array([[1,1,1],[2,2,1],[1,1,1]])
d

array([[1, 1, 1],
       [2, 2, 1],
       [1, 1, 1]])

In [75]:
np.unique(d,axis=0)

array([[1, 1, 1],
       [2, 2, 1]])

In [76]:
np.unique(d,axis=1)

array([[1, 1],
       [1, 2],
       [1, 1]])

In [77]:
np.unique(d)

array([1, 2])

### Matrix

In [133]:
t = np.array([[1,2],[3,4]])
t.T

array([[1, 3],
       [2, 4]])

## Create & Load datas

load data from txt

In [64]:
np.loadtxt('data/page-blocks.data')

array([[  5.,   7.,  35., ...,  23.,   6.,   1.],
       [  6.,   7.,  42., ...,  37.,   5.,   1.],
       [  6.,  18., 108., ...,  80.,   7.,   1.],
       ...,
       [  6.,  95., 570., ..., 519., 104.,   1.],
       [  7.,  41., 287., ..., 230.,  45.,   1.],
       [  8.,   1.,   8., ...,   8.,   1.,   4.]])

In [24]:
# create random array
np.random.rand(10, 5)

array([[0.47464161, 0.79499601, 0.09549415, 0.0492763 , 0.67984528],
       [0.91375557, 0.49841697, 0.63072952, 0.44571339, 0.43655292],
       [0.41119016, 0.96115107, 0.00176761, 0.59910887, 0.71133907],
       [0.0459591 , 0.28300987, 0.61572131, 0.03688486, 0.70255894],
       [0.83295279, 0.04879594, 0.45958794, 0.4979532 , 0.4313398 ],
       [0.23223011, 0.15045083, 0.08571578, 0.17718285, 0.41485307],
       [0.59508781, 0.52188516, 0.01452611, 0.89491179, 0.83759987],
       [0.29650063, 0.71362639, 0.57681011, 0.66371328, 0.415598  ],
       [0.57263208, 0.85182313, 0.15789275, 0.46896263, 0.50182396],
       [0.39722968, 0.94566108, 0.27440981, 0.66897548, 0.82478889]])

In [25]:
# create random int array
np.random.randint(0,5,size=(3,3))

array([[0, 3, 3],
       [4, 4, 3],
       [1, 2, 0]])

In [29]:
# create random int array
np.random.randint(10, size=10)

array([6, 9, 9, 1, 9, 6, 9, 2, 4, 2])

In [99]:
l = np.array([[1,2,3]])
l = np.concatenate((l, [['a','b','c']]))
l.T[:,0].astype(np.int32)

array([1, 2, 3], dtype=int32)

In [101]:
np.array(['2'])

array(['22'], dtype='<U2')

In [103]:
a,b = np.array([1,2])
a
b

2