In [1]:
import numpy as np

# Why Numpy?

* it is a primary package for scientific computing
* it supports multi-dimensional arrays 
* built in array operations
* Simplified but powerful array interactions

* Numpy arrays are fast 
* it can hold only one data type 


Pandas are built on top of numpy 

In [21]:
# working with ndarrays 
an_array = np.array([3,33,333]) # rank 1 array
print(an_array)
print(type(an_array))
print(an_array.shape) # printing shape of nd array

[  3  33 333]
<class 'numpy.ndarray'>
(3,)


## ndarrays are mutable

In [7]:
an_array[0]=20 # must be of the same type

In [9]:
r2 = np.array([[11,12,13],[14,15,16]])
r2

array([[11, 12, 13],
       [14, 15, 16]])

In [23]:
# creating prefilling arrays
ex1 = np.zeros((2,2))
ex2 = np.full((2,2),9) # creating a 2*2 array filled with 9
ex3 = np.ones((2,2))
ex4 = np.eye(2,2) # here only the diagonals are one 
ex5 = np.random.random((2,2))
ex5

array([[0.84035697, 0.68318669],
       [0.95497046, 0.89357235]])

In [27]:
# when slicing an ndarray we create a second reference to the same underlying data
a_slice  = r2[0:,1:2]
a_slice # when you modify the slice you are actually modifying the actual r2 array

array([[12],
       [15]])

## Since slice elements have the same memory address as the original array. we must create another copy to prevent this

In [28]:
a_slice  = np.array(r2[0:,1:2])
a_slice

array([[12],
       [15]])

In [31]:
#combinations of slices and indexing 
a_slice2 = r2[1,:] # this creates a rank1 array
a_slice3 = r2[1:2,:] # this creates a rank2 array
print(a_slice3)


[[14 15 16]]


In [32]:
r2.shape

(2, 3)

# Conditional Indexing

In [39]:
an_array = np.array([[11,12],[21,22],[31,32]])
filter = (an_array>15)
print(an_array[filter])
# this can be done in a single step 
print(an_array[an_array>15])
print(an_array[(an_array>15)&(an_array<30)]) # getting a range of values 
print(an_array[(an_array%2==0)])

[21 22 31 32]
[21 22 31 32]
[21 22]
[12 22 32]


## Each ndarray has its own datatype

## Basics operations of ndarray


In [46]:
# setting datattype of an ndarray
print(ex1.dtype)
# setting an explicit type 
ex2 = np.array([11.1,12.5,13.1],dtype=np.int64)
print(ex2.dtype)
print(ex2)
ex3 = np.array([11,12,13])
print(np.add(ex2,ex3))
print(np.subtract(ex2,ex3))
print(np.multiply(ex2,ex3))
print(np.divide(ex2,ex3))
print(np.exp(ex2))
print(np.sqrt(ex3))

float64
int64
[11 12 13]
[22 24 26]
[0 0 0]
[121 144 169]
[1. 1. 1.]
[ 59874.1417152  162754.791419   442413.39200892]
[3.31662479 3.46410162 3.60555128]


## Basic statistical operations:

In [65]:
arr = 10*np.random.randn(2,5)
print(arr.mean())
print(f"Mean for each row {arr.mean(axis=1)}")
print(f"Mean for each column {arr.mean(axis=0)}")
print(f"Sum of all numbers {arr.sum(axis=0)}")
print(f"median values for each row {np.median(arr,axis=1)}")
unsorted = np.random.randn(10)
print(unsorted)
sorted_arr = np.array(unsorted)
sorted_arr.sort() # this is an inplace operation
print(sorted_arr)
arr1 = np.array([1,1,1,2,3,4,1,2,4,5,6])
print(np.unique(arr1))

-4.349511090454893
Mean for each row [-5.43605457 -3.26296761]
Mean for each column [-13.3890901   -1.05491188   4.15122443 -11.34256326  -0.11221464]
Sum of all numbers [-26.77818019  -2.10982376   8.30244885 -22.68512652  -0.22442929]
median values for each row [-6.34573043 -4.4248556 ]
[ 0.43037747 -0.96351231  0.39938984  0.72410592  0.49999642 -1.82528024
  0.17203456 -0.35613975 -0.91035399  1.5378878 ]
[-1.82528024 -0.96351231 -0.91035399 -0.35613975  0.17203456  0.39938984
  0.43037747  0.49999642  0.72410592  1.5378878 ]
[1 2 3 4 5 6]


## Set operations on a ndarray

In [71]:
s1 = np.array(['desk','bulb','chair'])
s2 = np.array(['lamp','bulb','chair'])
print(s1,s2)
print(np.intersect1d(s1,s2)) # intersect expects 1-D arrays
print(np.union1d(s1,s2)) 
print(np.setdiff1d(s1,s2)) # elements that are in s1 but not in s2
print(np.in1d(s1,s2)) # which element of s1 is also in s2, output is an array of booleans. 

['desk' 'bulb' 'chair'] ['lamp' 'bulb' 'chair']
['bulb' 'chair']
['bulb' 'chair' 'desk' 'lamp']
['desk']
[False  True  True]


## Broadcasting

* It aims to solve mismatching in array sizes. 
* When operating on two-arrays, Numpy compares their shapes element wise. it starts with the trailing dimensions, and works its way forward. 
* The two arrays are compatible when they are equal or one of them is 1. 

In [81]:
arr2 = np.zeros((4,3))
add_rows = np.array([1,0,2])
y = arr2+add_rows
add_cols = np.array([[1,2,3,4]])
add_cols = add_cols.T
y1 = arr2+add_cols
print(f"The result by adding values to rows\n {y}")
print(f"The result by adding values to columns\n {y1}")

The result by adding values to rows
 [[1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]]
The result by adding values to columns
 [[1. 1. 1.]
 [2. 2. 2.]
 [3. 3. 3.]
 [4. 4. 4.]]
