#### Creating arrays 


In [1]:
import numpy as np 

np.array([1,2,3,4])

array([1, 2, 3, 4])

In [2]:
# Import the numpy package
import numpy as np

# Create a 1d integer array from a list
arr1 = np.array([1, 2, 3, 4])

# Print the array and its type
print(arr1)
print(type(arr1))

[1 2 3 4]
<class 'numpy.ndarray'>


In [3]:
# Create a 1d float array
arr2 = np.array([1, 2, 3, 4], dtype='float32')

# Print the array and its type
print(type(arr2))
print(arr2)

<class 'numpy.ndarray'>
[1. 2. 3. 4.]


In [4]:
# Create a 2d array from a list of lists
lists = [[0,1,2], [3,4,5], [6,7,8]]
arr2d = np.array(lists)

print(arr2d)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [6]:
arr1 = np.array([1, 2, 3, 4])
print(arr1)

# Vector (element-wise) operations
print(arr1 * 2)
print(arr1 + 2)
print(arr1 * arr1)

[1 2 3 4]
[2 4 6 8]
[3 4 5 6]
[ 1  4  9 16]


- Some other key differences between Python built-in lists and NumPy arrays are:

- Array size cannot be changed after creation, you will have to create a new array or overwrite the existing one to change size.
- Unlike lists, all items in the array must be of the same dtype.
- An equivalent NumPy array occupies much less space than a Python list of lists.

## Arrays from scratch 
---=

In [9]:

# Create a 3x3 floating-point array filled with 1s
np.ones((3, 3), dtype=float)

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [8]:
# Create an integer array of length 100 filled with zeros
np.zeros(100, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [10]:

# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 3
# (this is similar to the built-in range() function)
np.arange(0, 20, 3)

array([ 0,  3,  6,  9, 12, 15, 18])

In [11]:

# Create an array of hundred values evenly spaced between 0 and 1
np.linspace(0, 1, 100)


array([0.        , 0.01010101, 0.02020202, 0.03030303, 0.04040404,
       0.05050505, 0.06060606, 0.07070707, 0.08080808, 0.09090909,
       0.1010101 , 0.11111111, 0.12121212, 0.13131313, 0.14141414,
       0.15151515, 0.16161616, 0.17171717, 0.18181818, 0.19191919,
       0.2020202 , 0.21212121, 0.22222222, 0.23232323, 0.24242424,
       0.25252525, 0.26262626, 0.27272727, 0.28282828, 0.29292929,
       0.3030303 , 0.31313131, 0.32323232, 0.33333333, 0.34343434,
       0.35353535, 0.36363636, 0.37373737, 0.38383838, 0.39393939,
       0.4040404 , 0.41414141, 0.42424242, 0.43434343, 0.44444444,
       0.45454545, 0.46464646, 0.47474747, 0.48484848, 0.49494949,
       0.50505051, 0.51515152, 0.52525253, 0.53535354, 0.54545455,
       0.55555556, 0.56565657, 0.57575758, 0.58585859, 0.5959596 ,
       0.60606061, 0.61616162, 0.62626263, 0.63636364, 0.64646465,
       0.65656566, 0.66666667, 0.67676768, 0.68686869, 0.6969697 ,
       0.70707071, 0.71717172, 0.72727273, 0.73737374, 0.74747

In [12]:
# Create a 3x3 array of uniformly distributed random values between 0 and 1
np.random.random((3, 3))


array([[0.11646628, 0.92520379, 0.4334946 ],
       [0.16100953, 0.94533796, 0.56795995],
       [0.39998611, 0.33135857, 0.88590981]])

In [13]:

# Create a 3x3 array of random integers in the interval [0, 10)
np.random.randint(0, 10, (3, 3))


array([[0, 9, 9],
       [6, 9, 7],
       [1, 7, 2]])

In [14]:

# Create a 3x3 array of normally distributed random values
# with mean 0 and standard deviation 1
np.random.normal(0, 1, (3, 3))

array([[-0.04076817,  0.99160306, -1.50297989],
       [ 0.73233918, -0.69904097,  0.27490603],
       [ 0.26098443, -0.0080079 ,  1.42479905]])

In [15]:

np.random.randint(10, size=6)  # One-dimensional array of random integers

array([1, 9, 9, 3, 6, 6])

In [16]:
np.random.randint(10, size=(3, 3))  # Two-dimensional array of random integers

array([[2, 9, 3],
       [0, 1, 2],
       [8, 2, 4]])

In [17]:
np.random.randint(10, size=(3, 3, 3))  # Three-dimensional array of random integers

array([[[9, 3, 6],
        [8, 5, 1],
        [6, 8, 0]],

       [[4, 6, 4],
        [5, 9, 5],
        [6, 7, 5]],

       [[0, 5, 9],
        [8, 6, 7],
        [8, 0, 8]]])

### Array Attributes 
- Each array has the following attributes:
 
- ndim: the number of dimensions
- shape: the size of each dimension
- size: the total size of the array
- dtype: the data type of the array
- itemsize: the size (in bytes) of each array element
- nbytes: the total size (in bytes) of the array

In [20]:
import numpy as np

# Create a 3x3 array of random integers in the interval [0, 10)
x = np.random.randint(0, 10, (3, 3))
print(x)

print("ndim: ", x.ndim)
print("shape:", x.shape)
print("x size: ", x.size)
print("dtype:", x.dtype)
print("itemsize:", x.itemsize, "bytes")
print("nbytes:", x.nbytes, "bytes")

[[2 6 7]
 [7 3 1]
 [0 1 7]]
ndim:  2
shape: (3, 3)
x size:  9
dtype: int64
itemsize: 8 bytes
nbytes: 72 bytes


## Numpy Basics- Array indexing and slicing 
---
#####  Array Indexing: Accessing Single Elements
- ython’s standard list indexing. In a 1D array, we can access the ith value by specifying the index of the element we need in square brackets.

In [21]:
# Input array
x1 = np.array([1, 3, 4, 4, 6, 4])

# Assess the first value of x1
print("The first value of x1 positions:", end=" ")
print(x1[0])

# Assess the third value of x1
print("The third value of x1 positions:", end=" ")
print(x1[2])

The first value of x1 positions: 1
The third value of x1 positions: 4


In [22]:
# Get the last value of x1
print("The last value of x1 positions:", end=" ")
print(x1[-1])

# Get the second last value of x1
print("The second last value of x1 positions:", end=" ")
print(x1[-2])

The last value of x1 positions: 4
The second last value of x1 positions: 6


- we have a multidimensional array, and want to access items based on both column and row, we can pass the row and column indices at the same time using a comma-separated tuple

In [23]:
# In a multidimensional array, we need to specify row and column index. Given input array x2:
x2 = np.array([[3, 2, 5, 5],[0, 1, 5, 8], [3, 0, 5, 0]])
print("x2 = ", end="")
print(x2)


print("Value in 3rd row and 4th column of x2: ", end=" ")
print(x2[2,3])


print("3rd row and last value from the 3rd column of x2:", end=" ")
print(x2[2,-1])
#>0

# Replace value in 1st row and 1st column of x2 with 1
x2[0,0] = 1
print("updated x2 = ", end="")
print(x2)


x2 = [[3 2 5 5]
 [0 1 5 8]
 [3 0 5 0]]
Value in 3rd row and 4th column of x2:  0
3rd row and last value from the 3rd column of x2: 0
updated x2 = [[1 2 5 5]
 [0 1 5 8]
 [3 0 5 0]]


### array Slicing 
---
- Slicing array is a way to access subarrays, i.e., accessing multiple or a range of elements from an array instead of individual items

In [24]:
x1 = np.arange(10) # Input array
print("x1 = ", end="")
print(x1)

# Get the first 5 elements of x
print("the first 5 elements of x are: ", end="")
print(x1[:5])

# Elements after index 4
print("Elements after index 4: ", end="")
print(x1[4:])


# From 4th to 6th position
print("Elements from 4th to 6th position: ", end="")
print(x1[4:7])

# Return elements at even place (every other element)
print("Elements at even places are: ", end="")
print(x1[ : : 2])

#return elements from 1st position step by 2 (every other element starting at index 1)
print("Elements from 1st position step by 2 are: ", end="")
print(x1[1::2])


x1 = [0 1 2 3 4 5 6 7 8 9]
the first 5 elements of x are: [0 1 2 3 4]
Elements after index 4: [4 5 6 7 8 9]
Elements from 4th to 6th position: [4 5 6]
Elements at even places are: [0 2 4 6 8]
Elements from 1st position step by 2 are: [1 3 5 7 9]


In [25]:
#reverse the array
print("Reversed array: ", end="")
print(x1[::-1])

# reverse every other element starting from index 5
print("Array after reversing every other element starting from index 5: ", end="")
print(x1[5::-2])

Reversed array: [9 8 7 6 5 4 3 2 1 0]
Array after reversing every other element starting from index 5: [5 3 1]


In [26]:
#Two dimentional array
x2 = np.array([[0,1,2], [3,4,5], [6,7,8]])

# Extract the first two rows and two columns
print("The first two rows and columns are: ")
print(x2[:2, :2])

# all rows, every other column
print("All rows, every other column: ")
print(x2[:3, ::2])


The first two rows and columns are: 
[[0 1]
 [3 4]]
All rows, every other column: 
[[0 2]
 [3 5]
 [6 8]]


In [27]:
# Reverse only the row positions
print("The array after reversing the row positions: ")
print(x2[::-1, ])

# Reverse the row and column positions
print("The array after reversing the row and column positions: ")
print(x2[::-1, ::-1])


The array after reversing the row positions: 
[[6 7 8]
 [3 4 5]
 [0 1 2]]
The array after reversing the row and column positions: 
[[8 7 6]
 [5 4 3]
 [2 1 0]]


- Array slices are not copies of the arrays. This means that if we want to do a modification on the array obtained from the slicing operation without changing the original array, we have to use the copy() method:
---
- Numpy slicing syntax follows that of a python list: __arr[start : stop : step]__. When any of these are unspecified, they default to the values start=0, stop=size of dimension, step=1.


### NumPy Basics - Reshaping and Concatenation 
---
#####  Reshaping of Arrays

----
- Reshaping is a very useful operation and it can easily be done using the reshape() method
- 

In [28]:
import numpy as np

reshaped = np.arange(1, 10).reshape((3, 3))
print(reshaped)


[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [29]:
x = np.array([1, 2, 3])
print(x)

# row vector via reshape
x_rv= x.reshape((1, 3))
print(x_rv)

# column vector via reshape
x_cv = x.reshape((3, 1))
print(x_cv)

[1 2 3]
[[1 2 3]]
[[1]
 [2]
 [3]]


In [30]:
# concatenation 
# We can concatenate two or more arrays at once.
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
z = [11,11,11]

print("The concatenated single dimension array is:", end=" ")
print(np.concatenate([x, y, z]))
#> array([ 1,  2,  3,  3,  2,  1, 11, 11, 11])

# We can also concatenate 2-dimensional arrays.
grid = np.array([[1,2,3] , [4,5,6]])
print("The concatenated two-dimension array is:")
print(np.concatenate([grid, grid]))


The concatenated single dimension array is: [ 1  2  3  3  2  1 11 11 11]
The concatenated two-dimension array is:
[[1 2 3]
 [4 5 6]
 [1 2 3]
 [4 5 6]]


- In such cases, np.concatenate might not be the best option to use. Instead, you can use np.vstack (vertical stack) or np.hstack(horizontal stack) to finish the task.

In [31]:
x = np.array([3,4,5])
grid = np.array([[1,2,3],[9,10,11]])

# vertically stack the arrays
print("Veritcally stacked array: ")
print(np.vstack([x,grid]))

 # horizontally stack the arrays
z = np.array([[19],[19]])
print("Horizontally stacked array:")
print(np.hstack([grid,z]))
 

Veritcally stacked array: 
[[ 3  4  5]
 [ 1  2  3]
 [ 9 10 11]]
Horizontally stacked array:
[[ 1  2  3 19]
 [ 9 10 11 19]]


In [32]:
# Splitting 
x = np.array([3,4,5])
grid = np.array([[1,2,3],[9,10,11]])

# vertically stack the arrays
print("Veritcally stacked array: ")
print(np.vstack([x,grid]))

 # horizontally stack the arrays
z = np.array([[19],[19]])
print("Horizontally stacked array:")
print(np.hstack([grid,z]))
 

Veritcally stacked array: 
[[ 3  4  5]
 [ 1  2  3]
 [ 9 10 11]]
Horizontally stacked array:
[[ 1  2  3 19]
 [ 9 10 11 19]]


In [33]:
import numpy as np

grid = np.arange(16).reshape((4, 4))
print("grid: ")
print(grid, "\n")

# Split vertically and print upper and lower arrays
upper, lower = np.vsplit(grid, [2])
print("Upper part: ")
print(upper)
print("Lower part: ")
print(lower, "\n")

# Split horizontally and print left and right arrays
left, right = np.hsplit(grid, [2])
print("Left part: ")
print(left)
print("Right part: ")
print(right)

grid: 
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]] 

Upper part: 
[[0 1 2 3]
 [4 5 6 7]]
Lower part: 
[[ 8  9 10 11]
 [12 13 14 15]] 

Left part: 
[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
Right part: 
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


# Numpy Arithmetic and Statistics - Compuations and Aggregations 
---
#### Mathematical Functions#
- The arithmetic operators, as shown in the code widget below, are conveniently wrapped around specific functions built into NumPy; for example, the + operator is a wrapper for the add ufunc.



In [34]:
import numpy as np

x = np.arange(10)

# Native arithmentic operators
print("x =", x)
print("x + 5 =", x + 5)
print("x - 5 =", x - 5)
print("x * 5 =", x * 5)
print("x / 5 =", x / 5)
print("x ** 2 = ", x ** 2)
print("x % 2  = ", x % 2)

# OR we can use explicit functions, ufuncs, e.g. "add" instead of "+"
print(np.add(x, 5))
print(np.subtract(x, 5))
print(np.multiply(x, 5))
print(np.divide(x, 5))
print(np.power(x, 2))
print(np.mod(x, 2))

x = [0 1 2 3 4 5 6 7 8 9]
x + 5 = [ 5  6  7  8  9 10 11 12 13 14]
x - 5 = [-5 -4 -3 -2 -1  0  1  2  3  4]
x * 5 = [ 0  5 10 15 20 25 30 35 40 45]
x / 5 = [0.  0.2 0.4 0.6 0.8 1.  1.2 1.4 1.6 1.8]
x ** 2 =  [ 0  1  4  9 16 25 36 49 64 81]
x % 2  =  [0 1 0 1 0 1 0 1 0 1]
[ 5  6  7  8  9 10 11 12 13 14]
[-5 -4 -3 -2 -1  0  1  2  3  4]
[ 0  5 10 15 20 25 30 35 40 45]
[0.  0.2 0.4 0.6 0.8 1.  1.2 1.4 1.6 1.8]
[ 0  1  4  9 16 25 36 49 64 81]
[0 1 0 1 0 1 0 1 0 1]


- ome of the most useful functions for data scientists are the trigonometric functions. 

In [35]:
theta = np.linspace(0, np.pi, 4)
print("theta      = ", theta)
print("sin(theta) = ", np.sin(theta))
print("cos(theta) = ", np.cos(theta))
print("tan(theta) = ", np.tan(theta))

theta      =  [0.         1.04719755 2.0943951  3.14159265]
sin(theta) =  [0.00000000e+00 8.66025404e-01 8.66025404e-01 1.22464680e-16]
cos(theta) =  [ 1.   0.5 -0.5 -1. ]
tan(theta) =  [ 0.00000000e+00  1.73205081e+00 -1.73205081e+00 -1.22464680e-16]


- Similarly, we can also obtain logarithms and exponentials.

In [36]:
x = [1, 2, 3]
print("x     =", x)
print("e^x   =", np.exp(x))
print("2^x   =", np.exp2(x))
print("3^x   =", np.power(3, x))

print("ln(x)    =", np.log(x))
print("log2(x)  =", np.log2(x))
print("log10(x) =", np.log10(x))

x     = [1, 2, 3]
e^x   = [ 2.71828183  7.3890561  20.08553692]
2^x   = [2. 4. 8.]
3^x   = [ 3  9 27]
ln(x)    = [0.         0.69314718 1.09861229]
log2(x)  = [0.        1.        1.5849625]
log10(x) = [0.         0.30103    0.47712125]


## Universal Function Methods#
---
- ufuncs provide some methods that take two input parameters and return one output parameter. reduce and accumulate are two of the most important ones, so let’s look into those.
---
#### Calling the reduce method

- Say we want to apply some operation to reduce an array to a single value. We can use the reduce() method for this. This method repeatedly applies the given operation to the elements of an array until only a single result remains. For example, calling reduce on the add functions returns the sum of all elements in the array:


In [37]:
x = np.arange(1, 6)
sum_acc = np.add.accumulate(x)

print(x)
print(sum_acc)

[1 2 3 4 5]
[ 1  3  6 10 15]


#### b. Calling the accumulate method

- If we need to store all the intermediate results of the computation, we can use accumulate() instead:

In [38]:
x = np.arange(1, 6)
sum_acc = np.add.accumulate(x)

print(x)
print(sum_acc)

[1 2 3 4 5]
[ 1  3  6 10 15]


### Aggregations
---
-  We will look into the theoretical aspects of these statistical concepts in the “Statistics for Data Science” section, so don’t worry if you don’t remember what standard deviation is, for instance!

---
- 

In [39]:
import numpy as np

x = np.random.random(100)

# Sum of all the values
print("Sum of values is:", np.sum(x))
# Mean value
print("Mean value is: ", np.mean(x))

#For min, max, sum, and several other NumPy aggregates, 
#a shorter syntax is to use methods of the array object itself,
# i.e. instead of np.sum(x), we can use x.sum()
print("Sum of values is:", x.sum())
print("Mean value is: ", x.mean())
print("Max value is: ", x.max())
print("Min value is: ", x.min())

Sum of values is: 48.554508369444356
Mean value is:  0.48554508369444355
Sum of values is: 48.554508369444356
Mean value is:  0.48554508369444355
Max value is:  0.9799377668895904
Min value is:  0.004354053532383384


- we can perform aggregate operations on multi-dimensional arrays as well. Also, if we want to compute the minimum row wise or column wise, we can use the np.amin version instead

### NumPy Arithmetic and Statistics - Comparison and Boolean Masks
---
######  Comparisons and Boolean Masks
----


In [40]:
import numpy as np

x = np.array([1, 2, 3, 4, 5])

print(x < 2) # less than
print(x >= 4) # greater than or equal

[ True False False False False]
[False False False  True  True]


In [41]:
x = np.array([1, 2, 3, 4, 5])


print("Elements for which multiplying by two is the same as the square of the value: ")
print((2 * x) == (x ** 2))


Elements for which multiplying by two is the same as the square of the value: 
[False  True False False False]


In [42]:
import numpy as np

x = np.arange(10)
print(x)

# How many values less than 6?
print(np.count_nonzero(x < 6))

# Are there any values greater than 8?
print(np.any(x > 8))

# Are all values less than 10?
print(np.all(x < 10))

[0 1 2 3 4 5 6 7 8 9]
6
True
True


In [43]:
# Boolean Masks

import numpy as np 

# Random integers between [0, 10) of shape 3x3
x = np.random.randint(0, 10, (3, 3))
print(x)

# Boolean array
print(x < 6)

# Boolean mask
print(x[x < 6])

[[8 9 4]
 [4 8 1]
 [7 0 2]]
[[False False  True]
 [ True False  True]
 [False  True  True]]
[4 4 1 0 2]


- Updating a multidimensional array with a new record is straightforward in numpy as long as their shapes match.

In [44]:
import numpy as np

heights = [189, 170, 189, 163, 183, 171, 185, 168, 173, 183, 173, 173, 175, 178, 183, 193, 178, 173, 174, 183, 183, 180, 168, 180, 170, 178, 182, 180, 183, 178, 182, 188, 175, 179, 183, 193, 182, 183, 177, 185, 188, 188, 182, 185, 191]
ages = [57, 61, 57, 57, 58, 57, 61, 54, 68, 51, 49, 64, 50, 48, 65, 52, 56, 46, 54, 49, 51, 47, 55, 55, 54, 42, 51, 56, 55, 51, 54, 51, 60, 62, 43, 55, 56, 61, 52, 69, 64, 46, 54, 47, 70]

heights_arr = np.array(heights)
ages_arr = np.array(ages)

heights_arr = heights_arr.reshape((45,1))
ages_arr = ages_arr.reshape((45,1))

height_age_arr = np.hstack((heights_arr, ages_arr))
print(height_age_arr.shape)
print(height_age_arr[:3,])

(45, 2)
[[189  57]
 [170  61]
 [189  57]]


In [45]:
import numpy as np

heights = [189, 170, 189, 163, 183, 171, 185, 168, 173, 183, 173, 173, 175, 178, 183, 193, 178, 173, 174, 183, 183, 180, 168, 180, 170, 178, 182, 180, 183, 178, 182, 188, 175, 179, 183, 193, 182, 183, 177, 185, 188, 188, 182, 185, 191]
ages = [57, 61, 57, 57, 58, 57, 61, 54, 68, 51, 49, 64, 50, 48, 65, 52, 56, 46, 54, 49, 51, 47, 55, 55, 54, 42, 51, 56, 55, 51, 54, 51, 60, 62, 43, 55, 56, 61, 52, 69, 64, 46, 54, 47, 70]

heights_arr = np.array(heights)
ages_arr = np.array(ages)

heights_arr = heights_arr.reshape((1,45))
ages_arr = ages_arr.reshape((1,45))

height_age_arr = np.vstack((heights_arr, ages_arr))
print(height_age_arr.shape)
print(height_age_arr[:,:3])

(2, 45)
[[189 170 189]
 [ 57  61  57]]


- To combine more than two arrays horizontally, simply add the additional arrays into the tuple.