# How to get index locations that satisfy a given condition using np.where?

np.where locates the positions in the array where a given condition holds true.

## Create an array

In [1]:
import numpy as np

In [2]:
a = np.array([4, 6, 7, 7, 8, 8, 9, 10])

In [3]:
print("Array: ", a)

Array:  [ 4  6  7  7  8  8  9 10]


## Positions where value > 5

In [4]:
aa = np.where(a > 5)

In [5]:
print("Positions where value > 5: ", aa)

Positions where value > 5:  (array([1, 2, 3, 4, 5, 6, 7], dtype=int64),)


## Take items at given index

In [6]:
a.take(aa)

array([[ 6,  7,  7,  8,  8,  9, 10]])

## If value > 5, then yield "gt5" else "le5"

In [7]:
np.where(a > 5, "gt5", "le5")

array(['le5', 'gt5', 'gt5', 'gt5', 'gt5', 'gt5', 'gt5', 'gt5'],
      dtype='<U3')

In [8]:
a

array([ 4,  6,  7,  7,  8,  8,  9, 10])

## Location of the max

In [9]:
print("Position of max value: ", np.argmax(a))

Position of max value:  7


## Location of the min

In [10]:
print("Position of min value: ", np.argmin(a))

Position of min value:  0


# How to import and export data as a csv file?

A standard way to import datasets is to use the np.genfromtxt function. It can import datasets from web URLs, handle missing values, multiple delimiters, handle irregular number of columns etc.

A less versatile version is the np.loadtxt which assumes the dataset has no missing values.

# Turn off scientific notation

In [52]:
np.set_printoptions(suppress= True)

## Import data from csv file url

In [53]:
path = "https://raw.githubusercontent.com/selva86/datasets/master/Auto.csv"

In [14]:
data = np.genfromtxt(path, delimiter = ",", skip_header = 1, filling_values= -999, dtype="float")

In [15]:
data[:3]  #see first 3 rows

array([[  18. ,    8. ,  307. ,  130. , 3504. ,   12. ,   70. ,    1. ,
        -999. ],
       [  15. ,    8. ,  350. ,  165. , 3693. ,   11.5,   70. ,    1. ,
        -999. ],
       [  18. ,    8. ,  318. ,  150. , 3436. ,   11. ,   70. ,    1. ,
        -999. ]])

# How to handle datasets that has both numbers and text columns?

In [36]:
data2 = np.genfromtxt(path, delimiter= ",", skip_header = 1, dtype= "object")

In [37]:
data2[:3]

array([[b'18', b'8', b'307', b'130', b'3504', b'12', b'70', b'1',
        b'"chevrolet chevelle malibu"'],
       [b'15', b'8', b'350', b'165', b'3693', b'11.5', b'70', b'1',
        b'"buick skylark 320"'],
       [b'18', b'8', b'318', b'150', b'3436', b'11', b'70', b'1',
        b'"plymouth satellite"']], dtype=object)

In [54]:
data3 = np.genfromtxt(path, delimiter= ",", skip_header = 1, dtype=None)

  """Entry point for launching an IPython kernel.


In [55]:
data3[:3]

array([(18., 8, 307., 130, 3504, 12. , 70, 1, b'"chevrolet chevelle malibu"'),
       (15., 8, 350., 165, 3693, 11.5, 70, 1, b'"buick skylark 320"'),
       (18., 8, 318., 150, 3436, 11. , 70, 1, b'"plymouth satellite"')],
      dtype=[('f0', '<f8'), ('f1', '<i4'), ('f2', '<f8'), ('f3', '<i4'), ('f4', '<i4'), ('f5', '<f8'), ('f6', '<i4'), ('f7', '<i4'), ('f8', 'S38')])

## Save the array as a csv file

In [50]:
np.savetxt("url_out.csv", data, delimiter=",")