Some basic operations about numpy, including select, insert, delete, updata etc

In [8]:
import numpy as np
from scipy import stats

## Experiment

In [27]:
np.zeros(1,2)

TypeError: data type not understood

In [24]:
a = np.array(range(9)).reshape(3,3)
a
b = np.array([6,7,8])
# stats.zscore(a)
a = a.astype(np.float)

In [25]:
a[:,2] = stats.zscore(a[:,2])
stats.zscore(b),a

(array([-1.22474487,  0.        ,  1.22474487]),
 array([[ 0.        ,  1.        , -1.22474487],
        [ 3.        ,  4.        ,  0.        ],
        [ 6.        ,  7.        ,  1.22474487]]))

## Basic

### Types

In [3]:
# string type has length limit ! too short <U1 will cut integer
# https://stackoverflow.com/questions/9108837/numpy-recarray-strings-of-variable-length
a = np.array([[1,2,'3']])
a

array([['1', '2', '3']], dtype='<U21')

In [4]:
# string type has length limit ! too short <U1 will cut integer
a.shape
a.tolist()

[['1', '2', '3']]

In [5]:
# string type has length limit ! too short <U1 will cut integer
a = a.astype(np.int32)
a

array([[1, 2, 3]], dtype=int32)

In [6]:
a[:,0].dtype=('<U1')
a

array([[1, 2, 3]], dtype=int32)

In [7]:
# string type has length limit ! too short <U1 will cut integer
b = np.array([1,2,3])
b[:2]

array([1, 2])

### Shape

### Shape

In [8]:
col = np.array([1,2,3]).T
col.shape
col = np.array([[1],[2],[3]])

In [9]:
np.isscalar(data[0][0]), np.isscalar(np.array(1)), data[0] @ data[0].T, np.array([[1],[2],[3]])[0].shape,\
np.array([1,2,3]).shape, col[0]*col[1]

NameError: name 'data' is not defined

## Select & Cut

In [10]:
# create data for test
arr = np.arange(12).reshape((3,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

### Basic Selection

In [11]:
# get first line and column
arr[0], arr[:,0]

(array([0, 1, 2, 3]), array([0, 4, 8]))

In [12]:
# get first and last line and column
arr[[0,-1]], arr[:,[0,-1]]

(array([[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]]), array([[ 0,  3],
        [ 4,  7],
        [ 8, 11]]))

### Advanced Selection

In [13]:
# select exclude some line or column
np.delete(arr, ([0, 1]), axis=0), np.delete(arr, ([1,2,3]), axis=1)

(array([[ 8,  9, 10, 11]]), array([[0],
        [4],
        [8]]))

In [14]:
# select lines by column value
# select lines where the second column value > 3 and third column < 7
# https://stackoverflow.com/questions/29501823/numpy-filtering-rows-by-multiple-conditions
r1 = arr[(arr[:, 1] > 3) & (arr[:, 2] < 7)]
r2 = (arr[:, 1] > 3) & (arr[:,2] < 7)
r3 = np.where(r2)
r1, r2, r3, arr[r3]

(array([[4, 5, 6, 7]]),
 array([False,  True, False]),
 (array([1]),),
 array([[4, 5, 6, 7]]))

In [15]:
# select column by line value
# get columns where the first row < 2
# https://stackoverflow.com/questions/48523663/filter-the-columns-of-a-2d-numpy-array-by-only-one-line
ind = np.where(arr[0,:] <2)
col = arr[:, ind]
col,arr[:, arr[0] > 0]

(array([[[0, 1]],
 
        [[4, 5]],
 
        [[8, 9]]]), array([[ 1,  2,  3],
        [ 5,  6,  7],
        [ 9, 10, 11]]))

In [16]:
arr[np.where(arr < 6)]

array([0, 1, 2, 3, 4, 5])

In [17]:
multi_val = [['a', 'b'],['b','c'],['c', 'd']]
np.unique(multi_val)

array(['a', 'b', 'c', 'd'], dtype='<U1')

### Split

In [18]:
np.hsplit(arr, 2)

[array([[0, 1],
        [4, 5],
        [8, 9]]), array([[ 2,  3],
        [ 6,  7],
        [10, 11]])]

## Update

In [19]:
# data for test
data = np.array([1,2,3,4]).reshape((2,2))
empty = np.zeros((0, 2))
data, empty

(array([[1, 2],
        [3, 4]]), array([], shape=(0, 2), dtype=float64))

### Insert and Delete

In [20]:
insert1 = np.insert(empty, 0, values=data, axis=0),
insert2 = np.insert(empty, 0, values=data[0], axis=0),
insert1, insert2, empty

((array([[1., 2.],
         [3., 4.]]),),
 (array([[1., 2.]]),),
 array([], shape=(0, 2), dtype=float64))

In [21]:
np.delete(data, (0), axis=0)

array([[3, 4]])

### Append array

In [22]:
c1 = np.concatenate((empty, data))
# sort
c2 = np.concatenate((data, data[[1, 0]]))
# this is wrong! for data[0] shape is (2, ), dimension is different
# c3 = np.contatenate((empty, data[0]))
c1, c2, data[0].shape

(array([[1., 2.],
        [3., 4.]]), array([[1, 2],
        [3, 4],
        [3, 4],
        [1, 2]]), (2,))

In [23]:
a1 = np.append(empty,data[0])
a2 = np.append(empty,data)
a1, a2

(array([1., 2.]), array([1., 2., 3., 4.]))

## Operations between arrays

### Comp

In [24]:
# check if any item is equl
(np.array([1])==arr).any()

True

In [25]:
# check if every item is equl
(arr==arr).all()

True

### Numeric Calculation

In [26]:
arr = np.array(range(4)).reshape(2,2)
arr

array([[0, 1],
       [2, 3]])

In [27]:
arr + arr, arr + 1, arr * 2, arr + arr[0]

(array([[0, 2],
        [4, 6]]), array([[1, 2],
        [3, 4]]), array([[0, 2],
        [4, 6]]), array([[0, 2],
        [2, 4]]))

### Matrix Calculation

In [28]:
mx = np.array(range(4)).reshape(2,2)
mx @ mx

array([[ 2,  3],
       [ 6, 11]])

## Tools

### Generate

In [29]:
np.array(range(6)).reshape(2,3)

array([[0, 1, 2],
       [3, 4, 5]])

### Random

In [30]:
data = np.random.randint(0,20,size=(5,5))
data

array([[15,  8, 13, 14, 17],
       [14, 10,  5,  7,  6],
       [ 1,  8,  1, 13, 10],
       [ 5,  3,  5, 16, 10],
       [17, 18,  7, 15, 14]])

In [31]:
# create random array
np.random.rand(10, 5)

array([[0.21447121, 0.84092307, 0.24052402, 0.39869401, 0.31895834],
       [0.72048181, 0.63438915, 0.43680057, 0.19320846, 0.44584168],
       [0.85473538, 0.09337511, 0.57345203, 0.320588  , 0.06976489],
       [0.54294948, 0.92959569, 0.34658293, 0.43060896, 0.83568287],
       [0.94340471, 0.33637083, 0.73811504, 0.20249624, 0.07569047],
       [0.60251886, 0.93653618, 0.54759946, 0.54394652, 0.74409516],
       [0.92629177, 0.37953485, 0.22742165, 0.97682983, 0.47622205],
       [0.29390018, 0.0884032 , 0.77060091, 0.96901223, 0.15491035],
       [0.89234521, 0.9362115 , 0.44049641, 0.33324787, 0.70959939],
       [0.65788027, 0.0208121 , 0.03833889, 0.75990373, 0.47961012]])

In [32]:
# create random int array
np.random.randint(10, size=10)

array([4, 1, 7, 6, 8, 0, 7, 9, 0, 2])

In [33]:
# create random int array
np.random.randint(0,5,size=(3,3))

array([[4, 3, 2],
       [0, 4, 4],
       [2, 0, 2]])

In [34]:
# sum to 1
# https://stackoverflow.com/questions/18659858/generating-a-list-of-random-numbers-summing-to-1
np.random.dirichlet(np.ones(3), 1)[0]

array([0.2932914 , 0.61019887, 0.09650973])

### Sort

> https://stackoverflow.com/questions/46289740/how-to-sort-a-2d-numpy-array-lexicographically-by-one-column

In [35]:
# sorted indices of the first column, asc
data[:,0].argsort()

array([2, 3, 1, 0, 4])

In [36]:
# sorted indices of the first column, desc
data[:,0].argsort()[::-1]

array([4, 0, 1, 3, 2])

In [37]:
# same
data[np.argsort(data[:, 0])]
data[data[:,0].argsort()]

array([[ 1,  8,  1, 13, 10],
       [ 5,  3,  5, 16, 10],
       [14, 10,  5,  7,  6],
       [15,  8, 13, 14, 17],
       [17, 18,  7, 15, 14]])

In [38]:
# reverse sort
data[np.argsort(data[:, 0])[::-1]]
data[data[:,0].argsort()[::-1]]

array([[17, 18,  7, 15, 14],
       [15,  8, 13, 14, 17],
       [14, 10,  5,  7,  6],
       [ 5,  3,  5, 16, 10],
       [ 1,  8,  1, 13, 10]])

### Uniq

In [39]:
d = np.array([[1,1,1],[2,2,1],[1,1,1]])
d

array([[1, 1, 1],
       [2, 2, 1],
       [1, 1, 1]])

In [40]:
np.unique(d,axis=0), np.unique(d, axis=1), np.unique(d)

(array([[1, 1, 1],
        [2, 2, 1]]), array([[1, 1],
        [1, 2],
        [1, 1]]), array([1, 2]))

## Create & Load datas

load data from txt

In [43]:
np.loadtxt('data/page-blocks.data')

array([[  5.,   7.,  35., ...,  23.,   6.,   1.],
       [  6.,   7.,  42., ...,  37.,   5.,   1.],
       [  6.,  18., 108., ...,  80.,   7.,   1.],
       ...,
       [  6.,  95., 570., ..., 519., 104.,   1.],
       [  7.,  41., 287., ..., 230.,  45.,   1.],
       [  8.,   1.,   8., ...,   8.,   1.,   4.]])

In [44]:
a,b = np.array([1,2])
c = np.array([1,2])
a, b, c

(1, 2, array([1, 2]))

In [46]:
a = np.array([[1, 10000000, 4000],
             [2, 300020493, 2343]])
(a - a.mean())/a.std()


array([[-0.4649813 , -0.37499272, -0.46494532],
       [-0.46498129,  2.23486086, -0.46496023]])

**Some References:**
[Difference between numpy.array shape (R, 1) and (R,)](https://stackoverflow.com/questions/22053050/difference-between-numpy-array-shape-r-1-and-r)

[What are the differences between numpy arrays and matrices? Which one should I use?](What are the differences between numpy arrays and matrices? Which one should I use?)



In [49]:
from tensorflow.examples.tutorials.mnist import input_data

In [50]:
input_data.read_data_sets('./data/', one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use urllib or similar directly.


URLError: <urlopen error [Errno 104] Connection reset by peer>

In [54]:
arr = np.array(range(12)).reshape((3,4))

In [62]:
np.shape(arr[:,0])

(3,)

In [63]:
np.full((3,), 7)

array([7, 7, 7])

In [64]:
arr[range(2)]

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [65]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [72]:
arr = np.array(range(100)).reshape((10,10))

In [75]:
np.vsplit(arr, [8])

[array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
        [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
        [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
        [70, 71, 72, 73, 74, 75, 76, 77, 78, 79]]),
 array([[80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
        [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])]

In [76]:
arr = arr/10

In [77]:
arr

array([[0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
       [1. , 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9],
       [2. , 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9],
       [3. , 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9],
       [4. , 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9],
       [5. , 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7, 5.8, 5.9],
       [6. , 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9],
       [7. , 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9],
       [8. , 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9],
       [9. , 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9]])