In [1]:
import numpy as np

# Elementwise Operations

**1. Basic Operations**

**with scalars**

In [2]:
a = np.array([1, 2, 3, 4]) #create an array

a + 1

array([2, 3, 4, 5])

In [3]:
a ** 2

array([ 1,  4,  9, 16])

**All arithmetic operates elementwise**

In [4]:
b = np.ones(4) + 1

a - b

array([-1.,  0.,  1.,  2.])

In [5]:
a * b

array([2., 4., 6., 8.])

In [6]:
# Matrix multiplication

c = np.diag([1, 2, 3, 4])

print(c * c)
print("*****************")
print(c.dot(c))

[[ 1  0  0  0]
 [ 0  4  0  0]
 [ 0  0  9  0]
 [ 0  0  0 16]]
*****************
[[ 1  0  0  0]
 [ 0  4  0  0]
 [ 0  0  9  0]
 [ 0  0  0 16]]


**comparisions**

In [7]:
a = np.array([1, 2, 3, 4])
b = np.array([5, 2, 2, 4])
a == b

array([False,  True, False,  True])

In [8]:
a > b

array([False, False,  True, False])

In [9]:
#array-wise comparisions
a = np.array([1, 2, 3, 4])
b = np.array([5, 2, 2, 4])
c = np.array([1, 2, 3, 4])

np.array_equal(a, b)

False

In [10]:
np.array_equal(a, c)

True

**Logical Operations**

In [11]:
a = np.array([1, 1, 0, 0], dtype=bool)
b = np.array([1, 0, 1, 0], dtype=bool)

np.logical_or(a, b)

array([ True,  True,  True, False])

In [12]:
np.logical_and(a, b)

array([ True, False, False, False])

**Transcendental functions:**

In [13]:
a = np.arange(5)

np.sin(a)   

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [14]:
np.log(a)

  """Entry point for launching an IPython kernel.


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436])

In [15]:
np.exp(a)   #evaluates e^x for each element in a given input

array([ 1.        ,  2.71828183,  7.3890561 , 20.08553692, 54.59815003])

**Shape Mismatch**

In [17]:
a = np.arange(4)

a + np.array([1, 2])

ValueError: ignored

# Basic Reductions

**computing sums**

In [18]:
x = np.array([1, 2, 3, 4])
np.sum(x)

10

In [19]:
#sum by rows and by columns

x = np.array([[1, 1], [2, 2]])
x

array([[1, 1],
       [2, 2]])

In [20]:
x.sum(axis=0)   #columns first dimension

array([3, 3])

In [21]:
x.sum(axis=1)  #rows (second dimension)

array([2, 4])

**Other reductions**

In [22]:
x = np.array([1, 3, 2])
x.min()

1

In [23]:
x.max()

3

In [24]:
x.argmin()# index of minimum element

0

In [25]:
x.argmax()# index of maximum element

1

**Logical Operations**

In [26]:
np.all([True, True, False])

False

In [27]:
np.any([True, False, False])

True

In [28]:
#Note: can be used for array comparisions
a = np.zeros((50, 50))
np.any(a != 0)

False

In [29]:
np.all(a == a)

True

In [30]:
a = np.array([1, 2, 3, 2])
b = np.array([2, 2, 3, 2])
c = np.array([6, 4, 4, 5])
((a <= b) & (b <= c)).all()

True

**Statistics**

In [31]:
x = np.array([1, 2, 3, 1])
y = np.array([[1, 2, 3], [5, 6, 1]])
x.mean()

1.75

In [32]:
np.median(x)

1.5

In [33]:
np.median(y, axis=-1) # last axis

array([2., 5.])

In [34]:
x.std()          # full population standard dev.

0.82915619758885

**Example:**

Data in populations.txt describes the populations of hares and lynxes (and carrots) in northern Canada during 20 years.


In [35]:
#load data into numpy array object
data = np.loadtxt('populations.txt')

OSError: ignored

In [36]:
data

NameError: ignored

In [37]:
year, hares, lynxes, carrots = data.T #columns to variables
print(year)

NameError: ignored

In [None]:
#The mean population over time
populations = data[:, 1:]
populations

In [38]:
#sample standard deviations
populations.std(axis=0)

NameError: ignored

In [None]:
#which species has the highest population each year?

np.argmax(populations, axis=1)

# Broadcasting

Basic operations on numpy arrays (addition, etc.) are elementwise

This works on arrays of the same size.
    Nevertheless, It’s also possible to do operations on arrays of different sizes if NumPy can transform these arrays     so that they all have the same size: this conversion is called broadcasting.

The image below gives an example of broadcasting:

![title](https://github.com/shubhamanand43/Applied_AI_Course_Notes/blob/master/5%20-%20Python%20For%20Data%20Science%20Numpy/5.1%20-%20Numpy%20Introduction/broadcasting.png?raw=1)

In [39]:
a = np.tile(np.arange(0, 40, 10), (3,1))
print(a)

print("*************")
a=a.T
print(a)

[[ 0 10 20 30]
 [ 0 10 20 30]
 [ 0 10 20 30]]
*************
[[ 0  0  0]
 [10 10 10]
 [20 20 20]
 [30 30 30]]


In [40]:

b = np.array([0, 1, 2])
b

array([0, 1, 2])

In [41]:

a + b

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

In [42]:
a = np.arange(0, 40, 10)
a.shape


(4,)

In [43]:
a = a[:, np.newaxis]  # adds a new axis -> 2D array
a.shape

(4, 1)

In [44]:
a

array([[ 0],
       [10],
       [20],
       [30]])

In [45]:
a + b

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

# Array Shape Manipulation

**Flattening**

In [96]:
a = np.array([[1, 2, 3], [4, 5, 6]])
a.ravel() #Return a contiguous flattened array. A 1-D array, containing the elements of the input, is returned. A copy is made only if needed.

array([1, 2, 3, 4, 5, 6])

In [97]:
a.T #Transpose

array([[1, 4],
       [2, 5],
       [3, 6]])

In [98]:
a.T.ravel()

array([1, 4, 2, 5, 3, 6])

**Reshaping**

The inverse operation to flattening:

In [99]:
print(a.shape)
print(a)

(2, 3)
[[1 2 3]
 [4 5 6]]


In [100]:
b = a.ravel()
print(b)

[1 2 3 4 5 6]


In [101]:
b = b.reshape((2, 3))
b

array([[1, 2, 3],
       [4, 5, 6]])

In [102]:
b[0, 0] = 100
a

array([[100,   2,   3],
       [  4,   5,   6]])

**Note and       Beware: reshape may also return a copy!:**

In [103]:
a = np.zeros((3, 2))
b = a.T.reshape(3*2)
b[0] = 50
a

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

**Adding a Dimension**

Indexing with the np.newaxis object allows us to add an axis to an array

newaxis is used to increase the dimension of the existing array by one more dimension, when used once. Thus,

1D array will become 2D array

2D array will become 3D array

3D array will become 4D array and so on

In [104]:
z = np.array([1, 2, 3])
z

array([1, 2, 3])

In [105]:
z[:, np.newaxis]

array([[1],
       [2],
       [3]])

**Dimension Shuffling**

In [106]:
a = np.arange(4*3*2).reshape(4, 3, 2)
a.shape

(4, 3, 2)

In [107]:
a

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5]],

       [[ 6,  7],
        [ 8,  9],
        [10, 11]],

       [[12, 13],
        [14, 15],
        [16, 17]],

       [[18, 19],
        [20, 21],
        [22, 23]]])

In [108]:
a[0, 2, 1]

5

**Resizing**

In [109]:
a = np.arange(4)
a.resize((8,))
a

array([0, 1, 2, 3, 0, 0, 0, 0])

However, it must not be referred to somewhere else:

In [110]:
b = a
a.resize((4,)) 

ValueError: ignored

**Sorting Data**

In [None]:
#Sorting along an axis:
a = np.array([[5, 4, 6], [2, 3, 2]])
b = np.sort(a, axis=1)
b

In [None]:
#in-place sort
a.sort(axis=1)
a

In [None]:
#sorting with fancy indexing
a = np.array([4, 3, 1, 2])
j = np.argsort(a)
j

In [None]:
a[j]

<h3>The Numpy array object</h3>
**NumPy Arrays**
**python objects:**

high-level number objects: integers, floating point
containers: lists (costless insertion and append), dictionaries (fast lookup)
Numpy provides:

extension package to Python for multi-dimensional arrays
closer to hardware (efficiency)
designed for scientific computation (convenience)
Also known as array oriented computing

In [None]:
import numpy as np
a = np.array([0, 1, 2, 3])
print(a)

print(np.arange(10))

Why it is useful: Memory-efficient container that provides fast numerical operations.

In [None]:
#python lists
L = range(1000)
%timeit [i**2 for i in L]

In [None]:
a = np.arange(1000)
%timeit a**2

**1. Creating arrays**
1.1. Manual Construction of arrays

In [None]:
#1-D

a = np.array([0, 1, 2, 3])

a

In [None]:
#print dimensions

a.ndim

In [None]:
#shape

a.shape

In [None]:
len(a)

In [None]:
# 2-D, 3-D....

b = np.array([[0, 1, 2], [3, 4, 5]])

b


In [None]:
b.ndim

In [None]:
b.shape

In [None]:
len(b) #returns the size of the first dimention

In [None]:
c = np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]])

c

In [None]:
c.ndim

In [None]:
c.shape

1.2 Functions for creating arrays

In [None]:
#using arrange function

# arange is an array-valued version of the built-in Python range function

a = np.arange(10) # 0.... n-1
a

In [None]:
b = np.arange(1, 10, 2) #start, end (exclusive), step

b

In [None]:
#using linspace

a = np.linspace(0, 1, 6) #start, end, number of points

a

In [None]:
#common arrays

a = np.ones((3, 3))

a

In [None]:
b = np.zeros((3, 3))

b

In [None]:
c = np.eye(3)  #Return a 2-D array with ones on the diagonal and zeros elsewhere.

c

In [None]:
d = np.eye(3, 2) #3 is number of rows, 2 is number of columns, index of diagonal start with 0

d

In [None]:
#create array using diag function

a = np.diag([1, 2, 3, 4]) #construct a diagonal array.

a

In [None]:
np.diag(a)   #Extract diagonal

In [None]:
#create array using random

#Create an array of the given shape and populate it with random samples from a uniform distribution over [0, 1).
a = np.random.rand(4) 

a

In [None]:
a = np.random.randn(4)#Return a sample (or samples) from the “standard normal” distribution.  ***Gausian***

a

Note:

**For random samples from N(\mu, \sigma^2), use:**

sigma * np.random.randn(...) + mu

2. Basic DataTypes
You may have noticed that, in some instances, array elements are displayed with a trailing dot (e.g. 2. vs 2). This is due to a difference in the data-type used:

In [None]:
a = np.arange(10)

a.dtype

In [None]:
#You can explicitly specify which data-type you want:

a = np.arange(10, dtype='float64')
a

In [None]:
#The default data type is float for zeros and ones function

a = np.zeros((3, 3))

print(a)

a.dtype

other datatypes


In [None]:
d = np.array([1+2j, 2+4j])   #Complex datatype

print(d.dtype)

In [None]:
b = np.array([True, False, True, False])  #Boolean datatype

print(b.dtype)

In [None]:
s = np.array(['Ram', 'Robert', 'Rahim'])

s.dtype

Each built-in data type has a character code that uniquely identifies it.
'b' − boolean

'i' − (signed) integer

'u' − unsigned integer

'f' − floating-point

'c' − complex-floating point

'm' − timedelta

'M' − datetime

'O' − (Python) objects

'S', 'a' − (byte-)string

'U' − Unicode

'V' − raw data (void)

For more details

https://docs.scipy.org/doc/numpy-1.10.1/user/basics.types.html

**3 Indexing and Slicing**
**Indexing**

The items of an array can be accessed and assigned to the same way as other Python sequences (e.g. lists):

In [111]:
a = np.arange(10)

print(a[5])  #indices begin at 0, like other Python sequences (and C/C++)

5


In [112]:
# For multidimensional arrays, indexes are tuples of integers:

a = np.diag([1, 2, 3])

print(a[2, 2])

3


In [113]:
a[2, 1] = 5 #assigning value

a

array([[1, 0, 0],
       [0, 2, 0],
       [0, 5, 3]])

3.2 Slicing

In [114]:
a = np.arange(10)

a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [115]:
a[1:8:2] # [startindex: endindex(exclusive) : step]

array([1, 3, 5, 7])

In [116]:
#we can also combine assignment and slicing:

a = np.arange(10)
a[5:] = 10
a

array([ 0,  1,  2,  3,  4, 10, 10, 10, 10, 10])

In [117]:
b = np.arange(5)
a[5:] = b[::-1]  #assigning

a

array([0, 1, 2, 3, 4, 4, 3, 2, 1, 0])

 <h3>copies and Views</h3>
A slicing operation creates a view on the original array, which is just a way of accessing array data. Thus the original array is not copied in memory. You can use np.may_share_memory() to check if two arrays share the same memory block.

When modifying the view, the original array is modified as well: 

In [118]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [119]:
b = a[::2]
b

array([0, 2, 4, 6, 8])

In [120]:
np.shares_memory(a, b)

True

In [121]:
b[0] = 10
b

array([10,  2,  4,  6,  8])

In [122]:
a  #eventhough we modified b,  it updated 'a' because both shares same memory

array([10,  1,  2,  3,  4,  5,  6,  7,  8,  9])

In [123]:
a = np.arange(10)

c = a[::2].copy()     #force a copy
c

array([0, 2, 4, 6, 8])

In [124]:
np.shares_memory(a, c)

False

In [125]:
c[0] = 10

a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

<h3>5. Fancy Indexing</h3>
NumPy arrays can be indexed with slices, but also with boolean or integer arrays (masks). This method is called fancy indexing. It creates copies not views.

Using Boolean Mask

In [126]:
a = np.random.randint(0, 20, 15)
a

array([16, 19, 18, 19, 13, 15, 17, 13,  7, 18, 15,  5, 14,  9,  3])

In [127]:
mask = (a % 2 == 0)

In [128]:
extract_from_a = a[mask]

extract_from_a

array([16, 18, 18, 14])

Indexing with a mask can be very useful to assign a new value to a sub-array:

In [129]:
a[mask] = -1
a

array([-1, 19, -1, 19, 13, 15, 17, 13,  7, -1, 15,  5, -1,  9,  3])

Indexing with an array of integers

In [130]:
a = np.arange(0, 100, 10)

a

array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [131]:
#Indexing can be done with an array of integers, where the same index is repeated several time:

a[[2, 3, 2, 4, 2]]

array([20, 30, 20, 40, 20])

In [132]:
# New values can be assigned 

a[[9, 7]] = -200

a

array([   0,   10,   20,   30,   40,   50,   60, -200,   80, -200])