In [2]:
import numpy as np
import time

In [3]:
# let's declare the size
Size = 10000

# Creating two lists
list1 = range(Size)
list2 = range(Size)

# Creating two NumPy arrays
arr1 = np.arange(Size)
arr2 = np.arange(Size)

# Calculating time for Python list
start = time.time()
result1 = [(x + y) for x, y in zip(list1, list2)]

print("Time for Python List in msec: ", (time.time() - start) )

# Calculating time for NumPy array
start = time.time()
result2 = (arr1 + arr2)

print("Time for NumPy array in msec: ", (time.time()- start) )

Time for Python List in msec:  0.003030538558959961
Time for NumPy array in msec:  0.0002613067626953125


## How to extract all numbers between a given range from a numpy array?

In [9]:
a = np.arange(15)

# Method 1
index = np.where((a >= 5) & (a <= 10))
output_array = a[index]
output_array

array([ 5,  6,  7,  8,  9, 10])

In [10]:
# Method 2
output_array = a[(a >= 5) & (a <= 10)]
output_array

array([ 5,  6,  7,  8,  9, 10])

## How to print only 3 decimal places in python numpy array?

In [11]:
# Create the random array
rand_arr = np.random.random([5,3])

rand_arr

array([[0.6383374 , 0.78662208, 0.99271606],
       [0.02637304, 0.71718048, 0.79613436],
       [0.03066648, 0.4755357 , 0.92213485],
       [0.07704712, 0.97828143, 0.88762748],
       [0.12061094, 0.65873438, 0.15095335]])

In [13]:
# Limit to 3 decimal places
np.set_printoptions(precision=2)
rand_arr[:4]

array([[0.64, 0.79, 0.99],
       [0.03, 0.72, 0.8 ],
       [0.03, 0.48, 0.92],
       [0.08, 0.98, 0.89]])

## How to replace all values greater than a given value to a given cutoff?

In [16]:
np.random.seed(10)
a = np.random.uniform(1,50, 30)
a

array([38.79,  2.02, 32.05, 37.69, 25.43, 12.02, 10.71, 38.27,  9.29,
        5.33, 34.58, 47.72,  1.19, 26.1 , 40.82, 31.01, 36.37, 15.3 ,
       45.97, 36.01, 27.58,  7.97, 19.29, 34.03, 22.65, 22.27, 31.27,
       26.14, 32.87, 30.45])

In [17]:
# Solution 1: Using np.clip
output_array = np.clip(a, a_min=10, a_max=30)
output_array

array([30.  , 10.  , 30.  , 30.  , 25.43, 12.02, 10.71, 30.  , 10.  ,
       10.  , 30.  , 30.  , 10.  , 26.1 , 30.  , 30.  , 30.  , 15.3 ,
       30.  , 30.  , 27.58, 10.  , 19.29, 30.  , 22.65, 22.27, 30.  ,
       26.14, 30.  , 30.  ])

In [18]:
# Solution 2: Using np.where
print(np.where(a < 10, 10, np.where(a > 30, 30, a)))

[30.   10.   30.   30.   25.43 12.02 10.71 30.   10.   10.   30.   30.
 10.   26.1  30.   30.   30.   15.3  30.   30.   27.58 10.   19.29 30.
 22.65 22.27 30.   26.14 30.   30.  ]


## How to drop all missing values from a numpy array?

In [19]:
a = np.array([1,2,3,np.nan,5,6,7,np.nan])
a[~np.isnan(a)]

array([1., 2., 3., 5., 6., 7.])

## How to get the positions of top n values from a numpy array?

In [25]:
# Input
np.random.seed(100)
a = np.random.uniform(1,50, 10)

# Solution:
print(a)
a.argsort() 

[27.63 14.64 21.8  42.39  1.23  6.96 33.87 41.47  7.7  29.18]


array([4, 5, 8, 1, 2, 0, 9, 6, 7, 3], dtype=int64)

In [27]:
a.argsort()[-3:]

array([6, 7, 3], dtype=int64)

In [28]:
a[3]

42.39403048367528

In [29]:
a[7]

41.46678500014733

In [None]:
where can this be used?

In [4]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'

iris = np.genfromtxt(url, delimiter=',', dtype='object')

In [5]:
iris

array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa'],
       [b'4.6', b'3.1', b'1.5', b'0.2', b'Iris-setosa'],
       [b'5.0', b'3.6', b'1.4', b'0.2', b'Iris-setosa'],
       [b'5.4', b'3.9', b'1.7', b'0.4', b'Iris-setosa'],
       [b'4.6', b'3.4', b'1.4', b'0.3', b'Iris-setosa'],
       [b'5.0', b'3.4', b'1.5', b'0.2', b'Iris-setosa'],
       [b'4.4', b'2.9', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.1', b'1.5', b'0.1', b'Iris-setosa'],
       [b'5.4', b'3.7', b'1.5', b'0.2', b'Iris-setosa'],
       [b'4.8', b'3.4', b'1.6', b'0.2', b'Iris-setosa'],
       [b'4.8', b'3.0', b'1.4', b'0.1', b'Iris-setosa'],
       [b'4.3', b'3.0', b'1.1', b'0.1', b'Iris-setosa'],
       [b'5.8', b'4.0', b'1.2', b'0.2', b'Iris-setosa'],
       [b'5.7', b'4.4', b'1.5', b'0.4', b'Iris-setosa'],
       [b'5.4', b'3.9', b'1.3', b'0.4', b'Iris-setosa'],
       [b'5.1', b'3.5', b'1.4',

In [6]:
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')