## Numpy Basic
### Concatenation of arrays
**np.concatenate , np.vstack , and np.hstack**

### Splitting of arrays
**np.split , np.hsplit , and np.vsplit**

In [None]:
import numpy as np

In [None]:
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])
print(x1)
y = np.array([[0, 1, 2, 3],
              [4, 5, 6, 7],
              [8, 9, 10, 11],
              [12, 13, 14, 15]])
upper, lower = np.vsplit(y, [2])
print(y)
print(upper)
print(lower)

In [None]:
import timeit 
big_array = np.random.randint(1, 100, size=1000000)
%timeit (1.0 / big_array)

### Aggregates

#### reduce accumulate outer 

In [None]:
x = np.arange(1, 6)
print(np.add.reduce(x))
print(np.multiply.reduce(x))
print(np.add.accumulate(x))
print(np.multiply.accumulate(x))
print(np.multiply.outer(x, x))

#### sum

In [None]:
x = np.arange(1, 6)
y = np.arange(6,11)
x = np.vstack([x,y])
print(x)
print(sum(x))
print(np.sum(x))
print(x.sum())

#### min, max

In [None]:
x = np.arange(1, 6)
y = np.arange(6,11)
x = np.vstack([x,y])
print(x)
print(np.min(x), np.max(y))
print(x.min(), x.max())
%timeit x.min()

#### Multidimensional aggregates

In [None]:
M = np.random.randint(20,size=(3, 5))
print(M)
print(M.sum())
print(M.sum(axis=0))
print(M.sum(axis=1))
print(np.median(M))
print(np.median(M,axis=0))
print(np.percentile(M,10))



### Fancy Indexing
#### Indexing with broadcasting indexing array

In [None]:
X = np.arange(12).reshape((3, 4))
print(X)
row = np.array([0, 1, 2])
col = np.array([2, 1, 3])
print(np.array([row,col]))
print([row[:,np.newaxis],col])
print(X[row,col])
print(X[row[:,np.newaxis],col])

#### Combined Indexing

In [None]:
X = np.arange(12).reshape((3, 4))

row = np.array([0, 1, 2])
print("row shape",row.shape)
print("row newaxis shape:",row[:, np.newaxis].shape)
print("X: \n",X)
Xnewaxis = X[np.newaxis]
print("X newaxis:\n",Xnewaxis)
print("X newaxis shape:", Xnewaxis.shape)
Xnewaxis = X[:,np.newaxis]
print("X newaxis:\n",Xnewaxis)
print("X newaxis shape:", Xnewaxis.shape)
Xnewaxis = X[:,:,np.newaxis]
print("X newaxis:\n",Xnewaxis)
print("X newaxis shape:", Xnewaxis.shape)
print(X[row[:, np.newaxis]])
print(X[row[:, np.newaxis]].shape)

mask = np.array([1, 0, 1, 0])
print("X: \n",X)
print("mask:",[row[:, np.newaxis], mask])
print("X with mask: \n",X[row[:, np.newaxis], mask])
print("rowbymask shape:",X[row[:, np.newaxis], mask].shape)
mask = np.array([1, 0, 1, 0], dtype=bool)
print("mask:",[row[:, np.newaxis], mask])
print("X with mask: \n",X[row[:, np.newaxis], mask])

print("rowbymask shape:",X[row[:, np.newaxis], mask].shape)
X = np.arange(8).reshape(2,4)
mask = np.array([1, 0, 1, 0], dtype=bool)
print(X[np.array([0,1]).reshape(2,1),mask])


> take care, mask with dtype = bool has no broadcasting attribute. we have to create the boolean array manually

#### Modifying Values with Fancy Indexing

In [None]:
indice = np.array([0,4, 1,5, 2,6, 3,7])
print(indice[::2])
print([0,4, 1,5, 2,6, 3,7][::2])
print(np.arange(8))
print(np.add.reduceat(np.arange(8),[0,4, 1,5, 2,6, 3,7]))
print(np.add.reduceat(np.arange(8),[0,4, 1,5, 2,6, 3,7])[::2])

In [None]:
http://lijin-thu.github.io/03.%20numpy/03.16%20universal%20functions.html

In [None]:
x = np.arange(10)
## the operation only execute once other than sequentially
i = np.array([0,0])
x[i] = [4,6]  
print(x)
x[i] += 1     
print(x)
np.add.at(x, i, 1)
print(x)

> take care the differece of augmentation by pure indexing and np.add.at

### Sort

In [None]:
rand = np.random.RandomState(42)
X = rand.randint(0, 10, (4, 6))
print("X:\n",X)
x = np.sort(X.flatten())
print("X sorted:\n",x)
x = np.sort(X)
print("X sorted:\n",x)  # np.sort() sort array along rows by default
i = np.argsort(X)
print("indice of sorted X: \n",i)
x = np.sort(X, axis = 1) # the same as np.sort(X)
print("X sorted along rows:\n",x)
x = np.sort(X, axis = 0) 
print("X sorted along columns:\n",x)

In [None]:
rand = np.random.RandomState(42)
X = rand.randint(0, 10, (10, 2))
print(X)
print(X[:,np.newaxis,:])
print(X[:,np.newaxis,:].shape)
print(X[np.newaxis,:,:])
print(X[np.newaxis,:,:].shape)

differences = X[:,np.newaxis,:] - X[np.newaxis,:,:]
print("difference shape: ", (differences).shape)
sq_differences = differences ** 2
print("sq_differences shape: ", sq_differences.shape)
dist_sq = np.sum((X[:,np.newaxis,:] - X[np.newaxis,:,:]) ** 2, axis = -1)
print(dist_sq.shape)


### NumPy’s Structured Arrays

In [2]:
name = ['Alice', 'Bob', 'Cathy', 'Doug']
age = [25, 45, 37, 19]
weight = [55.0, 85.5, 68.0, 61.5]
data = np.zeros(4, dtype={'names':('name', 'age', 'weight'),
'formats':('U10', 'i4', 'f8')})
print(data.dtype)
data['name'] = name
data['age'] = age
data['weight'] = weight
print(data)
data['name']

[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')]
[('Alice', 25, 55. ) ('Bob', 45, 85.5) ('Cathy', 37, 68. )
 ('Doug', 19, 61.5)]


array(['Alice', 'Bob', 'Cathy', 'Doug'], dtype='<U10')

> Pandas is a better choice for structured data