In [52]:
import numpy as np
import pandas as pd
np.random.seed(0) # seed for reproducibility

In [3]:
x1 = np.random.randint(10, size=6) # One-dimensional array
x2 = np.random.randint(10, size=(3, 4)) # Two-dimensional array
x3 = np.random.randint(10, size=(3, 4, 5)) # Three-dimensional array

In [1]:
##Attributes of arrays-Determining the size, shape, memory consumption, and data types of arrays
##Indexing of arrays-Getting and setting the value of individual array elements
##Slicing of arrays-Getting and setting smaller subarrays within a larger array
##Reshaping of arrays - Changing the shape of a given array
##Joining and splitting of arrays- Combining multiple arrays into one, and splitting one array into many

In [17]:
x3

array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])

In [18]:
#print array attributes
print("x3 ndim: ", x3.ndim)
print("x3 shape:", x3.shape)
print("x3 size: ", x3.size)

x3 ndim:  3
x3 shape: (3, 4, 5)
x3 size:  60


In [25]:
#Array Slicing: Accessing Subarrays
#to access a slice of an array x: x[start:stop:step]
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [26]:
x[:5] # first five elements

array([0, 1, 2, 3, 4])

In [24]:
x[5:] # elements after index 5

array([5, 6, 7, 8, 9])

In [27]:
x[4:7] # middle subarray

array([4, 5, 6])

In [28]:
x2[:2, :3] # two rows, three columns

array([[3, 5, 2],
       [7, 6, 8]])

In [32]:
print(x2[:, 0]) # first column of x2

[3 7 1]


In [33]:
print(x2[0, :]) # first row of x2

[3 5 2 4]


In [37]:
#Let’s extract a 2×2 subarray from x2:
x2_sub = x2[:2, :2]
print(x2_sub)

[[3 5]
 [7 6]]


In [38]:
#Creating copies of arrays
x2_sub_copy = x2[:2, :2].copy()
print(x2_sub_copy)

[[3 5]
 [7 6]]


In [39]:
#Reshaping of Arrays
#to put the numbers1 through 9 in a 3×3 grid, you can do the following:
grid = np.arange(1, 10).reshape((3, 3))
print(grid)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [40]:
#Concatenation of arrays
grid = np.array([[1, 2, 3],
                [4, 5, 6]])
# concatenate along the first axis
np.concatenate([grid, grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [41]:
# concatenate along the second axis (zero-indexed)
np.concatenate([grid, grid], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [43]:
#np.vstack(vertical stack) and np.hstack (horizontal stack)
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],
                 [6, 5, 4]])
# vertically stack the arrays
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [44]:
# horizontally stack the arrays
y = np.array([[99],
              [99]])
np.hstack([grid, y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

In [46]:
#Trigonometric functions
theta = np.linspace(0, np.pi, 3)
theta

array([0.        , 1.57079633, 3.14159265])

In [47]:
print("theta = ", theta)
print("sin(theta) = ", np.sin(theta))
print("cos(theta) = ", np.cos(theta))
print("tan(theta) = ", np.tan(theta))

theta =  [0.         1.57079633 3.14159265]
sin(theta) =  [0.0000000e+00 1.0000000e+00 1.2246468e-16]
cos(theta) =  [ 1.000000e+00  6.123234e-17 -1.000000e+00]
tan(theta) =  [ 0.00000000e+00  1.63312394e+16 -1.22464680e-16]


In [48]:
#Exponents
x = [1, 2, 3]
print("x =", x)
print("e^x =", np.exp(x))
print("2^x =", np.exp2(x))
print("3^x =", np.power(3, x))

x = [1, 2, 3]
e^x = [ 2.71828183  7.3890561  20.08553692]
2^x = [2. 4. 8.]
3^x = [ 3  9 27]


In [49]:
#logarithms
x = [1, 2, 4, 10]
print("x =", x)
print("ln(x) =", np.log(x))
print("log2(x) =", np.log2(x))
print("log10(x) =", np.log10(x))

x = [1, 2, 4, 10]
ln(x) = [0.         0.69314718 1.38629436 2.30258509]
log2(x) = [0.         1.         2.         3.32192809]
log10(x) = [0.         0.30103    0.60205999 1.        ]


In [50]:
big_array = np.random.rand(1000000)
%timeit sum(big_array) #python sum fcn
%timeit np.sum(big_array) #numpy sum function

75.3 ms ± 1.76 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
1.04 ms ± 161 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [51]:
%timeit min(big_array)#python min function
%timeit np.min(big_array)#numpy min function

53.9 ms ± 4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
438 µs ± 25.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [59]:
#Example Average height of US Presidents
path="C:\\Users\\dkimanthi001\\Documents\\MPFDS\\Python4ds_cohort-1\\president_heights.csv"
data=pd.read_csv(path)

In [60]:
data.head()

Unnamed: 0,order,name,height(cm)
0,1,George Washington,189
1,2,John Adams,170
2,3,Thomas Jefferson,189
3,4,James Madison,163
4,5,James Monroe,183


In [62]:
heights = np.array(data['height(cm)'])
heights

array([189, 170, 189, 163, 183, 171, 185, 168, 173, 183, 173, 173, 175,
       178, 183, 193, 178, 173, 174, 183, 183, 168, 170, 178, 182, 180,
       183, 178, 182, 188, 175, 179, 183, 193, 182, 183, 177, 185, 188,
       188, 182, 185], dtype=int64)

In [64]:
print("Mean height: ", heights.mean())
print("Standard deviation:", heights.std())
print("Minimum height: ", heights.min())
print("Maximum height: ", heights.max())
print("25th percentile: ", np.percentile(heights, 25))
print("Median: ", np.median(heights))
print("75th percentile: ", np.percentile(heights, 75))

Mean height:  179.73809523809524
Standard deviation: 6.931843442745892
Minimum height:  163
Maximum height:  193
25th percentile:  174.25
Median:  182.0
75th percentile:  183.0


In [65]:
#Broadcasting
M = np.ones((2, 3))
a = np.arange(3)
M


array([[1., 1., 1.],
       [1., 1., 1.]])

In [66]:
a

array([0, 1, 2])

In [67]:
M + a

array([[1., 2., 3.],
       [1., 2., 3.]])

In [68]:
a = np.arange(3).reshape((3, 1))
b = np.arange(3)
a

array([[0],
       [1],
       [2]])

In [69]:
b

array([0, 1, 2])

In [71]:
#Fancy Indexing
rand = np.random.RandomState(42)
rand

<mtrand.RandomState at 0x1c56d34da20>

In [72]:
x = rand.randint(100, size=10)
x

array([51, 92, 14, 71, 60, 20, 82, 86, 74, 74])

In [73]:
[x[3], x[7], x[2]]

[71, 86, 14]

In [75]:
ind = [3, 7, 4]
x[ind] #x[[3, 7, 4]]

array([71, 86, 60])

In [76]:
#With fancy indexing, the shape of the result reflects the shape of the index arrays
#rather than the shape of the array being indexed:
ind = np.array([[3, 7],
[4, 5]])
x[ind]

array([[71, 86],
       [60, 20]])

In [None]:
X[2, [2, 0, 1]] #3rd row, indicies 2,0,1