#### 1. Import numpy as np and see the version

In [1]:
import numpy as np
np.version.full_version

'1.14.0'

In [2]:
np.__version__

'1.14.0'

#### 2. How to create a boolean array?

In [3]:
arr = np.arange(10)%2
np.array(arr, dtype = bool)

array([False,  True, False,  True, False,  True, False,  True, False,
        True])

In [4]:
#Create a 3×3 numpy array of all True’s
np.ones(9, dtype = bool).reshape(3,3)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [5]:
np.ones((3,3),dtype=bool)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [6]:
np.full((3,3),True,dtype=bool)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

#### 3. How to extract items that satisfy a given condition from 1D array?

In [7]:
#Extract all odd numbers from arr
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [8]:
arr[arr%2==1]

array([1, 3, 5, 7, 9])

In [9]:
#Replace all odd numbers in arr with -1
arr[arr%2==1] = -1
arr

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

#### 4. How to replace items that satisfy a condition without affecting the original array?

In [10]:
#Replace all odd numbers in arr with -1
arr_old = np.arange(10)

In [11]:
#create a implicit copy, otherwise it will chnage the original array
arr_new = arr_old.copy()

In [12]:
arr_new[arr_new%2==1] = -1
arr_new

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

In [13]:
arr_old

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [14]:
#Another way
#np.where(condition,value if condition is true,value if condition is false)
np.where(arr_old%2==1,-1,arr)

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

#### 5. How to reshape an array?

#### 6. How to stack two arrays vertically?

In [15]:
a = np.arange(10).reshape(2,-1)
a

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [16]:
b = np.repeat(1, 10).reshape(2,-1)
b

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [17]:
#use vstack()
np.vstack((a,b))

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [18]:
np.concatenate([a,b],axis=0)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [19]:
np.r_[a,b]
# axis and dimentionality

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

#### 7. How to stack two arrays horizontally?

In [20]:
np.hstack((a,b))

array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

In [21]:
np.concatenate([a,b],axis=1)

array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

In [22]:
np.c_[a,b]

array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

#### 8. How to generate custom sequences in numpy without hardcoding?

In [23]:
#Create the following pattern without hardcoding. Use only numpy functions and the below input array a.
a = np.array([1,2,3])
#array([1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3])
np.r_[np.repeat(a,3)]

array([1, 1, 1, 2, 2, 2, 3, 3, 3])

In [24]:
np.tile(a,3)

array([1, 2, 3, 1, 2, 3, 1, 2, 3])

In [25]:
np.r_[np.repeat(a,3),np.tile(a,3)]

array([1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3])

#### 9. How to get the common items between two python numpy arrays?

In [26]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

In [27]:
a[a == b]

array([2, 2, 4, 4])

In [28]:
set(a[a==b])

{2, 4}

In [29]:
# use set intersection
np.intersect1d(a,b)

array([2, 4])

#### 10. How to remove from one array those items that exist in another?

In [30]:
# use set difference
# and b remains unaffected
np.setdiff1d(a,b)

array([1, 3, 5, 6])

#### 11. How to get the positions where elements of two arrays match?

In [31]:
np.where(a==b)

(array([1, 3, 5, 7], dtype=int64),)

#### 12. How to extract all numbers between a given range from a numpy array?

In [32]:
# Get all items between 5 and 10 from a.
a = np.array([2, 6, 1, 9, 10, 3, 27])

In [33]:
# note that and is not same as &
index = np.where((a>5) & (a<10))
a[index]

array([6, 9])

In [34]:
a[((a>5) & (a<10))]

array([6, 9])

In [35]:
index = np.where(np.logical_and(a>5, a<10))
a[index]

array([6, 9])

#### 13. How to make a python function that handles scalars to work on numpy arrays?

In [36]:
# Convert the function maxx that works on two scalars, to work on two arrays.
def maxx(x, y):
    """Get the maximum of two items"""
    if x >= y:
        return x
    else:
        return y

maxx(1, 5)
#> 5

5

In [37]:
# np.vectorize is same as map function
pair_max = np.vectorize(maxx)

In [38]:
a = np.array([5, 7, 9, 8, 6, 4, 5])
b = np.array([6, 3, 4, 8, 9, 7, 1])
pair_max(a,b)

array([6, 7, 9, 8, 9, 7, 5])

#### 14. How to swap two columns in a 2d numpy array?

In [39]:
# Swap columns 1 and 2 in the array arr.
arr = np.arange(9).reshape(3,3)
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [40]:
arr[:,[1,0,2]]

array([[1, 0, 2],
       [4, 3, 5],
       [7, 6, 8]])

In [41]:
# row swap
arr[[1,0,2], :]

array([[3, 4, 5],
       [0, 1, 2],
       [6, 7, 8]])

In [42]:
# How to reverse the rows of a 2D array?
arr[::-1,:]

array([[6, 7, 8],
       [3, 4, 5],
       [0, 1, 2]])

In [43]:
# How to reverse the columns of a 2D array?
arr[:,::-1]

array([[2, 1, 0],
       [5, 4, 3],
       [8, 7, 6]])

#### 15. How to create a 2D array containing random floats between 5 and 10?

In [44]:
# Create a 2D array of shape 5x3 to contain random decimal numbers between 5 and 10.
a = np.random.uniform(5,10,(5,3))
a

array([[9.52918693, 7.81167543, 6.78484937],
       [5.93400374, 7.23949182, 8.36077547],
       [8.35054346, 6.69832045, 8.95379101],
       [5.92706098, 9.66809571, 8.72468445],
       [6.83822428, 5.67310874, 8.19183762]])

#### 16. How to print only 3 decimal places in python numpy array?

In [45]:
np.set_printoptions(precision=3)
a

array([[9.529, 7.812, 6.785],
       [5.934, 7.239, 8.361],
       [8.351, 6.698, 8.954],
       [5.927, 9.668, 8.725],
       [6.838, 5.673, 8.192]])

In [46]:
#preety print
np.random.seed(100)
rand_arr = np.random.random([3,3])/1e3
rand_arr

array([[5.434e-04, 2.784e-04, 4.245e-04],
       [8.448e-04, 4.719e-06, 1.216e-04],
       [6.707e-04, 8.259e-04, 1.367e-04]])

In [47]:
np.set_printoptions(suppress=True, precision=6)
rand_arr

array([[0.000543, 0.000278, 0.000425],
       [0.000845, 0.000005, 0.000122],
       [0.000671, 0.000826, 0.000137]])

#### 17. How to limit the number of items printed in output of numpy array?

In [48]:
# Limit the number of items printed in python numpy array a to a maximum of 6 elements.
a = np.array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])
#> array([ 0,  1,  2, ..., 12, 13, 14])

In [49]:
np.set_printoptions(threshold=6)
a

array([ 0,  1,  2, ..., 12, 13, 14])

#### 18. How to import a dataset with numbers and texts keeping the text intact in python numpy?

In [50]:
# Import the iris dataset keeping the text intact.
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [51]:
# already set dtype as object
iris[:3]

array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa']], dtype=object)

#### 19. How to insert values at random positions in an array?

In [52]:
np.random.seed(100)
rand_arr = np.random.randint(10,10000,(10,15))/1.0
rand_arr.dtype

dtype('float64')

In [53]:
i,j = np.where(rand_arr)
i

array([0, 0, 0, ..., 9, 9, 9], dtype=int64)

In [54]:
np.set_printoptions(threshold=np.nan)
rand_arr[np.random.choice(i,5),np.random.choice(j,5)] = np.nan
rand_arr

array([[5650., 6946., 8049.,   89.,  360., 4159., 7916., 5656.,  812.,
        4386.,   nan, 1350., 7748., 9733., 5779.],
       [6240., 2444., 5797., 1910., 5003., 3223., 6911., 5853., 9230.,
        2893., 4241., 3777., 4869., 3418.,   12.],
       [ 899., 4723., 2536., 9629., 3263., 7416., 4008., 3002., 3959.,
         816., 8476., 2250., 7106., 7084., 4543.],
       [1748., 4379., 9751., 8820., 8219., 7231., 6222.,  498., 3429.,
        4119., 5315., 4237., 2640., 1677., 9438.],
       [8596., 4601., 8845., 2931., 1807.,  849., 6192., 3296., 5300.,
        6249., 7927., 3713., 7979., 7434., 9921.],
       [8428., 9117., 4924., 1486., 4871., 3578., 1823., 8611., 3520.,
        8464., 1711., 6000., 5758., 3701., 1768.],
       [5949., 4995., 2636., 4349., 4545., 6427., 4919., 9711., 2147.,
         439.,  813., 1050.,   nan, 8182., 8990.],
       [6292., 6866., 5544., 1942., 3071., 9884., 1383., 7041.,  566.,
        3327., 9283.,   nan, 8970., 9970.,  704.],
       [3046., 9615., 55

In [55]:
#How to find the position of missing values in numpy array?
np.isnan(rand_arr).sum()

5

In [56]:
np.where(np.isnan(rand_arr))

(array([0, 6, 7, 8, 9], dtype=int64), array([10, 12, 11,  7,  8], dtype=int64))

#### 20. How to find the percentile scores of a numpy array?

In [57]:
a = np.random.randint(10,50,11)
a

array([33, 27, 11, 39, 26, 29, 46, 47, 26, 45, 12])

In [58]:
np.percentile(a,32)

26.200000000000003

#### 21. How to find the correlation between two columns of a numpy array?

In [64]:
# Q. Find the correlation between SepalLength(1st column) and PetalLength(3rd column) in iris_2d
# # Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

In [65]:
#solution 1
np.corrcoef(iris_2d[:, 0], iris_2d[:, 2])[0, 1]

0.8717541573048718

In [66]:
# solution 2
from scipy.stats.stats import pearsonr  
corr, p_value = pearsonr(iris_2d[:, 0], iris_2d[:, 2])
print(corr)

0.8717541573048712


#### 22. How to find if a given array has any null values?

In [67]:
# Find out if iris_2d has any missing values.
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

In [70]:
np.isnan(iris_2d).any()

False

#### 23. How to find the count of unique values in a numpy array?

In [72]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [75]:
species = np.array([row.tolist()[4] for row in iris])


In [74]:
np.unique(species, return_counts=True)

(array([b'Iris-setosa', b'Iris-versicolor', b'Iris-virginica'],
       dtype='|S15'), array([50, 50, 50], dtype=int64))

In [79]:
# pandas 
import pandas as pd
df = pd.DataFrame(iris)
df[4].unique

array([b'Iris-setosa', b'Iris-versicolor', b'Iris-virginica'],
      dtype=object)

In [82]:
df[4].value_counts()

b'Iris-versicolor'    50
b'Iris-setosa'        50
b'Iris-virginica'     50
Name: 4, dtype: int64

#### 24. How to convert a numeric to a categorical (text) array?
##### Less than 3 --> 'small'
##### 3-5 --> 'medium'
##### '>=5 --> 'large'