# Numpy Module
> Stands for *Numerical Python*.  
> **Numpy** is an n dimensional object or ndarray.  
> Provides an efficient way of storing *homogenous* data.  
> We can have ndarray with 1 dim or 1 row.
> We can have multi-dim arrays with more than one row $[[row1a,rowab],[row2a,row2b]]$.

## From List to Rank 1 array

In [1]:
a1 = [1,2,3,4,5]
print(type(a1))

import numpy as np
n1 = np.array(a1)
type(n1)

<class 'list'>


numpy.ndarray

## Slicing (Same as List)

In [2]:
a  = [1,2,11,6,8,18,2]
na = np.array(a)

In [3]:
na[2]

11

In [4]:
na[1:5]

array([ 2, 11,  6,  8])

In [5]:
na[1:5:2]

array([2, 6])

## Multidimensional array
> - Putting Lists in List creates multidimensional arrays.  

In [6]:
a2 = [[1,2,3],[4,5,6]]
n2 = np.array(a2)
print(n2)
print(n2.shape)

[[1 2 3]
 [4 5 6]]
(2, 3)


In [7]:
# Create an array with 0 with shape(2,2)
z1 = np.zeros((2,2))
print(z1)

[[0. 0.]
 [0. 0.]]


In [8]:
# dytpe further controls the data type
z2 = np.zeros((2,2),dtype=int)
print(z2)

[[0 0]
 [0 0]]


In [9]:
# Full forms an multidimensional array with shape (d1,d2), filled up by the last element
f1=np.full((2,2),5)
print(f1)

[[5 5]
 [5 5]]


In [10]:
i1 = np.eye(2,2) # Identity array
print(i1)

[[1. 0.]
 [0. 1.]]


In [11]:
# Ones give you an array with 1, can specify the shape 
i2 = np.ones(5)
print(i2)
i3 = np.ones((4,1))
print(i3)

[1. 1. 1. 1. 1.]
[[1.]
 [1.]
 [1.]
 [1.]]


In [12]:
arr=[[1,2,3,4],[3,4,5,6],[7,8,9,6],[12,7,10,9],[2,11,8,10]]
narr=np.array(arr)
print (narr)

[[ 1  2  3  4]
 [ 3  4  5  6]
 [ 7  8  9  6]
 [12  7 10  9]
 [ 2 11  8 10]]


In [13]:
print(narr[0])   # 1st row
print(narr[2])   # 3rd row
print(narr[2,3]) # 3rd row, 4th column

[1 2 3 4]
[7 8 9 6]
6


## Concatenate

In [47]:
x=np.array([2,6,8,4])
y=np.array([11,8,2])
z=np.concatenate((x,y))
print(z)

[ 2  6  8  4 11  8  2]


In [15]:
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
c1= np.concatenate((a, b))
print(c1)

c2= np.concatenate((a, b),axis=1) 
print((c2))

[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[1 2 5 6]
 [3 4 7 8]]


## Arithmetic Operation on Numpy Objects

In [16]:
#create numpy arrays
x = np.array([1,2,3])
y = np.array([2,3,4])

In [17]:
# Elementwise addition/subtraction/multiplication/division
print(x+y)
print(x-y)
print(x*y) # Hadmard product
print(x*3)
print(x/y)

[3 5 7]
[-1 -1 -1]
[ 2  6 12]
[3 6 9]
[0.5        0.66666667 0.75      ]


In [18]:
# Dot product (Multiply then Sum)
# Like matrix operation
print(np.dot(y,x))
print(x.dot(y))

20
20


In [19]:
# Transform np to matrix
A=np.matrix([[1, 2], [3, 4]])
print(type(A))

<class 'numpy.matrixlib.defmatrix.matrix'>


In [20]:
# After transforming to matrix, arithmetic operations are matrix operation
# Not elementwise
print (A*A) # Hadmard multiplication

[[ 7 10]
 [15 22]]


In [21]:
# Elemant wise multiplication
print(np.multiply(A,A))

[[ 1  4]
 [ 9 16]]


In [22]:
AA1 = np.concatenate((A,A),axis=1)
AA2 = np.concatenate((A,A),axis=0)

In [23]:
# Inverse of Matrix
from numpy.linalg import inv
print(inv(A))

[[-2.   1. ]
 [ 1.5 -0.5]]


In [24]:
# Transpose of Matrix
print(A.T)

[[1 3]
 [2 4]]


## Solving x for equation Ax=b

In [25]:
A=np.array([[2,1],[1,-1]])
print(A)

b=np.array([4,-1])
print(b)

np.linalg.solve(A,b)

[[ 2  1]
 [ 1 -1]]
[ 4 -1]


array([1., 2.])

## Basic Statistics with numpy

In [26]:
arr = [[1,2,0],[9,8,3],[5,2,2],[2,14,7]]
narr = np.array(arr)
print(narr)

[[ 1  2  0]
 [ 9  8  3]
 [ 5  2  2]
 [ 2 14  7]]


In [27]:
narr.sum()

55

In [28]:
# Be careful about the axis
print(narr.sum(axis=0)) # column wise sum
print(narr.sum(axis=1)) # row wise sum

[17 26 12]
[ 3 20  9 23]


In [29]:
# Column wise mean
print(narr.mean(axis=0))     # variable.mean()
print(np.mean(narr,axis=0))  # np.mean(variable)

[4.25 6.5  3.  ]
[4.25 6.5  3.  ]


In [30]:
print(narr.std(axis=0))
print(np.std(narr,axis=0))

[3.1124749  4.97493719 2.54950976]
[3.1124749  4.97493719 2.54950976]


In [31]:
# 2 ways to get the median
print(np.median(narr,axis=1))
print(np.percentile(narr,50,axis=1))

[1. 8. 2. 7.]
[1. 8. 2. 7.]


In [32]:
# Other useful functions
a = np.sqrt(5)   # Square root
b = np.exp(2)    # Exponential e^2
c = np.exp(1)    # Exponential e^1
d = np.log(c)    # Natural log with base e
e = np.log10(10) # Log with base 10
f = np.abs(-10)  # Absolute function
print(a,b,c,d,e,f)

2.23606797749979 7.38905609893065 2.718281828459045 1.0 1.0 10


## Random Number Generators

In [33]:
# Normal Distribution
a = np.random.normal(10,5,20)  # (mean,std,size)
b = np.random.randn(2,4)       # Standard Normal (dimension1, dimension2)

In [34]:
# Uniform Distribution
np.random.rand(2,4)            # (dimension1, dimension2)

array([[0.65761048, 0.64254464, 0.60562669, 0.16510118],
       [0.47027194, 0.71384638, 0.54286465, 0.38863332]])

In [35]:
# Choice of seed that generates the random numbers
np.random.seed(1234)  
np.random.normal(10,5,20)

array([12.35717582,  4.04512153, 17.16353484,  8.43674052,  6.39705633,
       14.4358147 , 14.29794207,  6.81738248, 10.07848186, -1.21342477,
       15.75017862, 14.95973011, 14.76662064, -0.1062741 ,  8.32961317,
       10.01059182, 12.02726706, 11.4454597 , 16.60579096,  2.26547223])

In [36]:
np.random.seed(1234) # Same as above
np.random.normal(10,5,20)

array([12.35717582,  4.04512153, 17.16353484,  8.43674052,  6.39705633,
       14.4358147 , 14.29794207,  6.81738248, 10.07848186, -1.21342477,
       15.75017862, 14.95973011, 14.76662064, -0.1062741 ,  8.32961317,
       10.01059182, 12.02726706, 11.4454597 , 16.60579096,  2.26547223])

## Slicing Again

In [37]:
np.random.seed(123)
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
data  = np.random.randn(7,4)
print(data)

[[-1.0856306   0.99734545  0.2829785  -1.50629471]
 [-0.57860025  1.65143654 -2.42667924 -0.42891263]
 [ 1.26593626 -0.8667404  -0.67888615 -0.09470897]
 [ 1.49138963 -0.638902   -0.44398196 -0.43435128]
 [ 2.20593008  2.18678609  1.0040539   0.3861864 ]
 [ 0.73736858  1.49073203 -0.93583387  1.17582904]
 [-1.25388067 -0.6377515   0.9071052  -1.4286807 ]]


In [38]:
# Choose the rows equal to the index of "Bob"
data[names=='Bob']

array([[-1.0856306 ,  0.99734545,  0.2829785 , -1.50629471],
       [ 1.49138963, -0.638902  , -0.44398196, -0.43435128]])

In [39]:
data[names=='Bob',2:]

array([[ 0.2829785 , -1.50629471],
       [-0.44398196, -0.43435128]])

In [40]:
# Choose the rows NOT equal to the index of "Bob"
# 2 ways to do so 
data[names !='Bob']
data[~(names=='Bob')]

array([[-0.57860025,  1.65143654, -2.42667924, -0.42891263],
       [ 1.26593626, -0.8667404 , -0.67888615, -0.09470897],
       [ 2.20593008,  2.18678609,  1.0040539 ,  0.3861864 ],
       [ 0.73736858,  1.49073203, -0.93583387,  1.17582904],
       [-1.25388067, -0.6377515 ,  0.9071052 , -1.4286807 ]])

In [41]:
# Choose either "Bob" or "Will"
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
ppl = (names=='Bob')|(names=='Will')
ppl

array([ True, False,  True,  True,  True, False, False])

In [42]:
data[ppl]

array([[-1.0856306 ,  0.99734545,  0.2829785 , -1.50629471],
       [ 1.26593626, -0.8667404 , -0.67888615, -0.09470897],
       [ 1.49138963, -0.638902  , -0.44398196, -0.43435128],
       [ 2.20593008,  2.18678609,  1.0040539 ,  0.3861864 ]])

In [43]:
# Replace all negative values by 0
data[data<0]=0
data

array([[0.        , 0.99734545, 0.2829785 , 0.        ],
       [0.        , 1.65143654, 0.        , 0.        ],
       [1.26593626, 0.        , 0.        , 0.        ],
       [1.49138963, 0.        , 0.        , 0.        ],
       [2.20593008, 2.18678609, 1.0040539 , 0.3861864 ],
       [0.73736858, 1.49073203, 0.        , 1.17582904],
       [0.        , 0.        , 0.9071052 , 0.        ]])

## Reshape the Array

In [44]:
arr = np.arange(10)     # Numpy way to create sequence
print(arr)
print(type(arr))
list1 = list(range(10)) # Recall the list and range
print(list1)
print(type(list1))

[0 1 2 3 4 5 6 7 8 9]
<class 'numpy.ndarray'>
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
<class 'list'>


In [45]:
a = arr.reshape(5,2)
print(a)
print(a.shape)

print(a.T)

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
(5, 2)
[[0 2 4 6 8]
 [1 3 5 7 9]]


In [46]:
b = arr.reshape(2,5)
print(b)
print(b.T)

[[0 1 2 3 4]
 [5 6 7 8 9]]
[[0 5]
 [1 6]
 [2 7]
 [3 8]
 [4 9]]


## Activity 1
> - Generate an 50x1 array $X$ containing random normals ~ N(0,1).  
> - Calculate the variance of $X$.  
> - Generate another 50x1 array $u$ containing random normals ~ N(0,1).  
> - Generate a y that equals to $y=1+2x+u$.  
> - Calculate the mean and variance of $y$.  

## Activity 2
> - Create a 50x1 vector $e$ that contains 1 only.  
> - Combine $e$ and $X$ as $Z$.  
> - Calculate inverse of $(Z'Z)$.  
> - Calculate product of $Z.T$ and $y$.    
> - What are the values of the product of inverse of $(Z'Z)$ and $Z'y$?    
> - Are they close to the intercept and the slope (1,2)?