# How to create a rank 1 np.array()

In [2]:
import numpy as np
an_array = np.array([3, 33, 333]) # Create a rank 1 array
print(type(an_array))             # The type of an ndarray is: "<class 'numpy.ndarray'>"

<class 'numpy.ndarray'>


In [3]:
# Test the shape of the array we just created, it should have just one dimension
print(an_array.shape)

(3,)


In [4]:
# Because this is a 1-rank array, we need only one index to access each element
print(an_array[0], an_array[1], an_array[2])

3 33 333


In [5]:
an_array[0] = 888                 # N darrays are mutable, here we change an element of the array
print(an_array)

[888  33 333]


In [6]:
another = np.array([[11, 12, 13], [21, 22, 23]]) # Create a rank 2 array

print(another) # Print the array

print("The shape is 2 rows, 3 columns: ", another.shape) # Rows x columns

print("Accessing elements [0,0][0,1], and [1,0] of the ndarray: ", another[0,0], ", ", another[0,1], ", ", another[1,0])

[[11 12 13]
 [21 22 23]]
The shape is 2 rows, 3 columns:  (2, 3)
Accessing elements [0,0][0,1], and [1,0] of the ndarray:  11 ,  12 ,  21


In [7]:
import numpy as np

# Create a 2x2 array of zeros
ex1 = np.zeros((2,2))
print(ex1)

[[ 0.  0.]
 [ 0.  0.]]


In [8]:
# Create a 2x2 array filled with 9.0
ex2 = np.full((2, 2), 9.1)
print(ex2)

[[ 9.1  9.1]
 [ 9.1  9.1]]


In [9]:
# Create a 2x2 matrix with the diagonal 1s and the others 0
ex3 = np.eye(10,10)
print(ex3)

[[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]]


In [10]:
# Create an array of ones
ex4 = np.ones((3,2))
print(ex4)

[[ 1.  1.]
 [ 1.  1.]
 [ 1.  1.]]


In [11]:
# Create an array of random floats between 0 and 1
ex5 = np.random.random((2,2))
print(ex5)

[[ 0.90399552  0.88436508]
 [ 0.96473456  0.02345255]]


# Array Indexing

In [12]:
import numpy as np

# Rank 2 array of shape (3, 4)
an_array = np.array([[11, 12, 13, 14], [21, 22, 23, 24], [31, 32, 33, 34]])
print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [13]:
a_slice = np.array(an_array[:2, 1:3])
print(a_slice)

[[12 13]
 [22 23]]


In [14]:
print("Before: ", an_array[0,1]) # Inspect the element at 0, 1
a_slice[0,0] = 1000 # a_slice[0,0] is the same piece of data as an_array[0,1]
print("After: ", an_array[0,1])

Before:  12
After:  12


# Using both integer indexing & slice indexing

In [15]:
# Create a Rank 2 array of shape(3,4)
an_array = np.array([[11, 12, 13, 14], [21, 22, 23, 24], [31, 32, 33, 34]])

print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [16]:
# Using both integer indexing & slicing generates an array of lower rank
row_rank1 = an_array[1, :] # Rank 1 view

print(row_rank1, row_rank1.shape) # Notice only a single

[21 22 23 24] (4,)


In [17]:
# Slicing alone: generates an array of the same rank as the an_array
row_rank2 = an_array[1:2, :] # Rank 2 view

print(row_rank2, row_rank2.shape) # Notice the [[ ]]

[[21 22 23 24]] (1, 4)


In [18]:
# We can do the same thing for columns of an array:

print()
col_rank1 = an_array[:, 1]
col_rank2 = an_array[:, 1:2]

print(col_rank1, col_rank1.shape) # Rank 1
print()
print(col_rank2, col_rank2.shape) # Rank 2


[12 22 32] (3,)

[[12]
 [22]
 [32]] (3, 1)


# Array Indexing for changin elements:

In [19]:
# Create a new array
an_array = np.array([[11, 12, 13], [21, 22, 23], [31, 32, 33], [41, 42, 43]])

print("Original Array: ")
print(an_array)

Original Array: 
[[11 12 13]
 [21 22 23]
 [31 32 33]
 [41 42 43]]


In [20]:
# Create an array of indices
col_indices = np.array([0, 1, 2, 0])
print("\nCol indices picked : ", col_indices)

row_indices = np.arange(4)
print("\nRows indices picke : ", row_indices)


Col indices picked :  [0 1 2 0]

Rows indices picke :  [0 1 2 3]


In [21]:
# Examine the pairing of row_indices and col_indices. These are the elementes we'll change next.
for row, col in zip(row_indices,col_indices):
    print(row, ", ", col)

0 ,  0
1 ,  1
2 ,  2
3 ,  0


In [22]:
# Select one element from each row
print("Values in the array at those indices: ", an_array[row_indices, col_indices])

Values in the array at those indices:  [11 22 33 41]


In [23]:
# Change one element from each row
print('Values in the array at those indices: ', an_array[row_indices, col_indices])

Values in the array at those indices:  [11 22 33 41]


In [24]:
# Change one element from each row using the indices selected
an_array[row_indices, col_indices] += 100000

print("\nChanged Array: ")
print(an_array)


Changed Array: 
[[100011     12     13]
 [    21 100022     23]
 [    31     32 100033]
 [100041     42     43]]


In [25]:
# Question:
arr = np.zeros((3,3))
print(arr[:2,])

[[ 0.  0.  0.]
 [ 0.  0.  0.]]


# Boolean Indexing

In [26]:
# Create a 3x2 array
an_array = np.array([[11, 12], [21, 22], [31, 32]])
print(an_array)

[[11 12]
 [21 22]
 [31 32]]


In [27]:
# Create a filter wich will be boolean values for whether each element meets this condition
filter = (an_array > 15)
filter

array([[False, False],
       [ True,  True],
       [ True,  True]], dtype=bool)

In [28]:
# We can now select just those elements wich meet that criteria
print(an_array[filter])

[21 22 31 32]


In [29]:
# For short, we could have just used the approach below without the need for the separate filter array.
print(an_array[an_array > 15])

# > 20 & < 30
print(an_array[(an_array > 20) & (an_array < 30)])

# % 2 == 0
print(an_array[(an_array % 2 == 0)])

[21 22 31 32]
[21 22]
[12 22 32]


In [30]:
an_array[an_array % 2 == 0] += 100
print(an_array)

[[ 11 112]
 [ 21 122]
 [ 31 132]]


# Datatypes and Array Operations

In [31]:
ex1 = np.array([11, 12]) # Python assigns the data type
print(ex1.dtype)

int64


In [32]:
ex2 = np.array([11.0, 12.0]) # Python assigns the data type
print(ex2.dtype)

float64


In [33]:
ex3 = np.array([11, 21], dtype=np.int64) # You can also tell Python the data type
print(ex3.dtype)

int64


In [34]:
# You can use this to force floats into integers (using floor function)
ex4 = np.array([11.1, 12.7], dtype=np.int64)
print(ex4.dtype)
print()
print(ex4)

int64

[11 12]


In [35]:
# You can use this to force integers into floats if you antecipate
# the values may change the floats later
ex5 = np.array([11, 21], dtype=np.float64)
print(ex5.dtype)
print()
print(ex5)

float64

[ 11.  21.]


# Arithmetic Array Operations:

In [36]:
x = np.array([[111, 112], [121, 122]], dtype=np.int)
y = np.array([[211.1, 212.1], [221.1,222.1]], dtype=np.float64)

print(x)
print()
print(y)

[[111 112]
 [121 122]]

[[ 211.1  212.1]
 [ 221.1  222.1]]


In [37]:
# Add
print(x + y)
print()
print(np.add(x, y))

[[ 322.1  324.1]
 [ 342.1  344.1]]

[[ 322.1  324.1]
 [ 342.1  344.1]]


In [38]:
# Subtract
print( x - y )
print()
print(np.subtract(x, y))

[[-100.1 -100.1]
 [-100.1 -100.1]]

[[-100.1 -100.1]
 [-100.1 -100.1]]


In [39]:
# Multiply
print( x * y )
print()
print(np.multiply(x, y))

[[ 23432.1  23755.2]
 [ 26753.1  27096.2]]

[[ 23432.1  23755.2]
 [ 26753.1  27096.2]]


In [40]:
# Divide
print( x / y )
print()
print(np.divide(x, y))

[[ 0.52581715  0.52805281]
 [ 0.54726368  0.54930212]]

[[ 0.52581715  0.52805281]
 [ 0.54726368  0.54930212]]


In [41]:
# Square Root
print(np.sqrt(x))

[[ 10.53565375  10.58300524]
 [ 11.          11.04536102]]


In [42]:
# Exponent ( e ** x)
print(np.exp(x))

[[  1.60948707e+48   4.37503945e+48]
 [  3.54513118e+52   9.63666567e+52]]


# Statistical Methods, Sorting, and Set Operations:

<p style="font-family: Arial; font-size:1.75em; color:#2462C0; font-style:bold">
<br>
Basic Statistical Operations:

In [43]:
# Setup a random 2 x 4 matrix
arr = 10 * np.random.randn(2,5)
print(arr)

[[ -4.59967984  -9.62936703 -11.11289596   4.56448464  -6.14997178]
 [-10.89910238  -3.85965777   5.55710395  -3.37257375   0.58896384]]


In [44]:
# Compute the mean for all elements
print(arr.mean())

-3.89126960743


In [45]:
# Compute the means by row
print(arr.mean(axis = 1 ))

[-5.38548599 -2.39705322]


In [46]:
# Compute the means by column
print(arr.mean(axis = 0))

[-7.74939111 -6.7445124  -2.777896    0.59595544 -2.78050397]


In [47]:
# Sum all the elements
print(arr.sum())

-38.9126960743


In [48]:
# Compute the medians
print(np.median(arr, axis = 1))

[-6.14997178 -3.37257375]


# Sorting:

In [49]:
# Create a 10 element array of randoms
unsorted = np.random.randn(10)

print(unsorted)

[-1.26159552  0.89962214 -0.85553891  0.25879021  1.06870038  0.18171983
 -1.6506083   1.15313977 -0.4900308  -0.9649613 ]


In [50]:
# Create copy and sort
sorted = np.array(unsorted)
sorted.sort()

print(sorted)
print()
print(unsorted)

[-1.6506083  -1.26159552 -0.9649613  -0.85553891 -0.4900308   0.18171983
  0.25879021  0.89962214  1.06870038  1.15313977]

[-1.26159552  0.89962214 -0.85553891  0.25879021  1.06870038  0.18171983
 -1.6506083   1.15313977 -0.4900308  -0.9649613 ]


In [51]:
# Inplace sorting
unsorted.sort()

print(unsorted)

[-1.6506083  -1.26159552 -0.9649613  -0.85553891 -0.4900308   0.18171983
  0.25879021  0.89962214  1.06870038  1.15313977]


# Finding Unique elements:

In [52]:
array = np.array([1, 2, 1, 4, 2, 1, 4, 2])
print(np.unique(array))

[1 2 4]


# Set Operations with np.array data type:

In [53]:
s1 = np.array(['desk', 'chair', 'bulb'])
s2 = np.array(['lamp', 'bulb', 'chair'])

print(s1, s2)

['desk' 'chair' 'bulb'] ['lamp' 'bulb' 'chair']


In [54]:
print(np.intersect1d(s1, s2))

['bulb' 'chair']


In [55]:
print(np.union1d(s1, s2))

['bulb' 'chair' 'desk' 'lamp']


In [56]:
print(np.setdiff1d(s1, s2)) # Element in s1 that are not in s2

['desk']


In [57]:
print(np.in1d(s1, s2)) # Which element of s1 is also in s2

[False  True  True]


# Broadcasting:

In [58]:
import numpy as np

start = np.zeros((4,3))
print(start)

[[ 0.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]]


In [59]:
# Create a rank 1 ndarray with 3 values
add_rows = np.array([1, 0, 2])
print(add_rows)

[1 0 2]


In [60]:
y = start + add_rows # Add to each row of 'start' using broadcasting
print(y)

[[ 1.  0.  2.]
 [ 1.  0.  2.]
 [ 1.  0.  2.]
 [ 1.  0.  2.]]


In [61]:
# Create an ndarray which is 4 x 1 to broadcast across columns 
add_cols = np.array([[0, 1, 2, 3]])
add_cols = add_cols.T

print(add_cols)

[[0]
 [1]
 [2]
 [3]]


In [62]:
# Add to each column of 'start' using broadcasting
y = start + add_cols
print(y)

[[ 0.  0.  0.]
 [ 1.  1.  1.]
 [ 2.  2.  2.]
 [ 3.  3.  3.]]


In [63]:
# This will just broadcast in both dimensions
add_scalar = np.array([1])
print(start+add_scalar)

[[ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]]


In [64]:
# Create our 3x4 matrix
arrA = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
print(arrA)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [65]:
# Create our 4x1 matrix
a = np.array([[0,0],[0,0]])
b1 = np.array([1,1])
b2 = 1
print(a+b1)
print(a+b2)

[[1 1]
 [1 1]]
[[1 1]
 [1 1]]


# Speedtest: ndarrays vs lists:

In [66]:
from numpy import arange
from timeit import Timer

size = 1000000
timeits = 1000

In [67]:
# Create the ndarray with values 0, 1, 2 ..., size-1
nd_array = arange(size)
print(type(nd_array))

<class 'numpy.ndarray'>


In [68]:
# Timer expects the operation as a parameter,
# Here we pass nd_array.sum()
timer_numpy = Timer("nd_array.sum()", "from __main__ import nd_array")

print("Time taken by numpy ndarray: %f seconds" % (timer_numpy.timeit(timeits)/timeits))

Time taken by numpy ndarray: 0.000783 seconds


In [69]:
# Create the list with values 0, 1, 2 ..., size-1
a_list = list(range(size))
print(type(a_list))

<class 'list'>


In [70]:
# Timer expects the operation as a parameter,
# Here we pass sum(a_list)
timer_list = Timer("sum(a_list)", "from __main__ import a_list")
print("Time taken by numpy ndarray: %f seconds" % (timer_list.timeit(timeits)/timeits))

Time taken by numpy ndarray: 0.009974 seconds


# Read or Write to Disk:

<p style="font-family: Arial; font-size:1.3em;color:#2462C0; font-style:bold"><br>

Binary Format:</p>

In [73]:
x = np.array([23.23, 24.24])

In [74]:
np.save('an_array', x)

In [75]:
np.load('an_array.npy')

array([ 23.23,  24.24])

<p style="font-family: Arial; font-size:1.3em;color:#2462C0; font-style:bold"><br>

Text Format:</p>

In [76]:
np.savetxt('array.txt', X=x, delimiter=',')

In [77]:
!cat array.txt

2.323000000000000043e+01
2.423999999999999844e+01


In [78]:
np.loadtxt('array.txt', delimiter=',')

array([ 23.23,  24.24])

# Additional Common ndarray Operations

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Dot Product on Matrices and Inner Product on Vectors:

</p>

In [79]:
# Determine the dot product of two matrices
x2d = np.array([[1,1],[1,1]])
y2d = np.array([[2,2], [2,2]])
print(x2d.dot(y2d))
print()
print(np.dot(x2d, y2d))

[[4 4]
 [4 4]]

[[4 4]
 [4 4]]


In [80]:
# Determine the inner product of two vectors
a1d = np.array([9,9])
b1d = np.array([10,10])

print(a1d.dot(b1d))
print()
print(np.dot(a1d, b1d))

180

180


In [81]:
# Dot produce on an array and vector
print(x2d.dot(a1d))
print()
print(np.dot(x2d, a1d))

[18 18]

[18 18]


<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Sum:
</p>

In [82]:
# Sum elements in the array
ex1 = np.array([[11,12], [21,22]])
print(np.sum(ex1)) # Add all members

66


In [83]:
print(np.sum(ex1, axis=0)) # Columnwise sum

[32 34]


In [85]:
print(np.sum(ex1, axis=1)) # Rowwise sum

[23 43]


<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Element-wise Functions: </p>

For example, let's compare two arrays values to get the maximum of each.

In [87]:
# Random array
x = np.random.randn(8)
x

array([-0.06712318,  2.23825587, -1.46733933,  0.21262319,  1.23169886,
       -1.30353549, -0.99630518, -0.34941964])

In [88]:
# Another random array
y = np.random.randn(8)
y

array([-0.04667501,  0.76904214, -0.1892482 , -0.80727388, -0.31570318,
       -0.46729143,  0.48976172,  0.87865633])

In [89]:
# Returns element wise maximum between two arrays

np.maximum(x, y)

array([-0.04667501,  2.23825587, -0.1892482 ,  0.21262319,  1.23169886,
       -0.46729143,  0.48976172,  0.87865633])

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Reshaping array:
</p>

In [91]:
# Grab values from 0 trough 19 in an array
arr = np.arange(20)
print(arr)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


In [92]:
# Reshape to be a 4 x 5 matrix
arr.reshape(4, 5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Transpose:

</p>

In [99]:
# Transpose
ex1 = np.array([[11,12],[21,22]])

print(ex1)
print()
print(ex1.T)

[[11 12]
 [21 22]]

[[11 21]
 [12 22]]


<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Indexing using where():</p>

In [100]:
x_1 = np.array([1, 2, 3, 4, 5])

y_1 = np.array([11, 22, 33, 44, 55])

filter = np.array([True, False, True, False, True])

In [101]:
out = np.where(filter, x_1, y_1)
print(out)

[ 1 22  3 44  5]


In [102]:
mat = np.random.rand(5,5)
mat

array([[ 0.86290003,  0.22039686,  0.59332803,  0.87541801,  0.05679421],
       [ 0.39291364,  0.518734  ,  0.31712481,  0.06448038,  0.44263506],
       [ 0.65431256,  0.54440701,  0.66386332,  0.23049517,  0.46823728],
       [ 0.26001235,  0.19103309,  0.70246167,  0.90331936,  0.0839569 ],
       [ 0.81440953,  0.50680996,  0.47696805,  0.06786918,  0.66636917]])

In [103]:
np.where( mat > 0.5, 1000, -1)

array([[1000,   -1, 1000, 1000,   -1],
       [  -1, 1000,   -1,   -1,   -1],
       [1000, 1000, 1000,   -1,   -1],
       [  -1,   -1, 1000, 1000,   -1],
       [1000, 1000,   -1,   -1, 1000]])

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

"any" or "all" conditionals:</p>

In [104]:
arr_bools = np.array([True, False, True, True, False])

In [105]:
arr_bools.any()

True

In [106]:
arr_bools.all()

False

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Random Number Generation:
</p>

In [107]:
Y = np.random.normal(size = (1,5))[0]
print(Y)

[ 0.95335636 -1.44195719 -1.07284075  0.5323386   1.34344186]


In [108]:
Z = np.random.randint(low=2, high=50, size=4)
print(Z)

[18 45 47  2]


In [109]:
np.random.permutation(Z) # Return a new ordering of elements in Z

array([47,  2, 45, 18])

In [110]:
np.random.uniform(size=4) # Uniform distribution

array([ 0.39548505,  0.29898945,  0.56719601,  0.28415048])

In [111]:
np.random.normal(size=4) # Normal distribution

array([-0.77703612,  0.24920552,  0.70377393, -0.34747853])

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Merging data sets:
</p>

In [112]:
K = np.random.randint(low=2, high=50, size=(2,2))
print(K)

print()

M = np.random.randint(low=2, high=50, size=(2,2))
print(M)

[[33 20]
 [ 3 19]]

[[43  7]
 [20 39]]


In [113]:
np.vstack((K,M))

array([[33, 20],
       [ 3, 19],
       [43,  7],
       [20, 39]])

In [114]:
np.hstack((K,M))

array([[33, 20, 43,  7],
       [ 3, 19, 20, 39]])

In [115]:
np.concatenate([K,M], axis = 0)

array([[33, 20],
       [ 3, 19],
       [43,  7],
       [20, 39]])

In [116]:
np.concatenate([K, M.T], axis = 1)

array([[33, 20, 43, 20],
       [ 3, 19,  7, 39]])