### Lists in Python

In [1]:
#lists in python

marks = [20,24,22,18,19]
print(marks)

marks.sort()
print(marks)

for i in marks:
    print(i)

for i in range(11):
    print(i)

[20, 24, 22, 18, 19]
[18, 19, 20, 22, 24]
18
19
20
22
24
0
1
2
3
4
5
6
7
8
9
10


In [2]:
sum = 0
for i in range(4):
    for j in range(4):
        sum = sum + (i*j)
print(sum)

36


### Issues with Python Lists:
- need to use loops for matrix operations which is very inefficient and cumbersome
- python is interpreted and is very slow
- memory inefficient
- no vectorised operations, hence no parallelisation

## Numpy

- Though numpy is used in python, it was implemented in c and c++ for faster computations

Features of Numpy:

- can define multidimensional arrays
- vectorised operations
- LA, stats and broadcasting(influence all elements of the array with a single op)
- foundation for pandas, scikit-learn, pytorch
- works only on CPU, not GPU

- The array object in NumPy is called ndarray, it provides a lot of supporting functions that make working with ndarray very easy.
- NumPy arrays are stored at one continuous place in memory unlike lists, so processes can access and manipulate them very efficiently. This behavior is called locality of reference in computer science.
- This is the main reason why NumPy is faster than lists. Also it is optimized to work with latest CPU architectures.

In [3]:
import numpy as np

In [4]:
a = np.array([1,2,3])
b = np.array([4,5,6])

c = np.array([[1,2],[3,4]])

print(a.shape)
print(a.ndim)
print(a.dtype)


(3,)
1
int64


In [5]:
#broadcasting
a *= 2
print(a)

#no broadcasting in normal lists
#it doubles the list
ls = [1,2,3]
ls *= 2
print(ls)

[2 4 6]
[1, 2, 3, 1, 2, 3]


## Math functions

In [6]:
print(np.sqrt(a))
print(np.log(a))
print(np.log10(a))
print(np.e)


[1.41421356 2.         2.44948974]
[0.69314718 1.38629436 1.79175947]
[0.30103    0.60205999 0.77815125]
2.718281828459045


## Slicing and Indexing

In [7]:
a = [1,2,3]
print(a[0])
print(c[0][0])
print(a[0:2])


1
1
[1, 2]


## Accessing multi-dim arrays

In [8]:
arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

print(arr[0, 1, 2])

6


In [9]:
arr = np.array([[1,2,3,4,5], [6,7,8,9,10]])

print('Last element from 2nd dim: ', arr[1, -1])

Last element from 2nd dim:  10


In [10]:
m = np.array([1,2,3,4,5,6])
mm = np.reshape(m, (2,3))
print(mm)

[[1 2 3]
 [4 5 6]]


In [11]:
arr = np.array([1, 2, 3, 4, 5, 6, 7])
# Return every other element from index 1 to index 5:
print(arr[1:5:2])

[2 4]


In [12]:
sum_row = np.sum(mm, axis = 1)
print(sum_row)

col_sum = np.sum(mm, axis = 0)
print(col_sum)

mean_row = np.mean(mm, axis = 1)
print(mean_row)

[ 6 15]
[5 7 9]
[2. 5.]


## Speed Comparison

In [13]:
import time
N = 10_000_000

a = list(range(N))
b = list(range(N))
c = list(range(N))

st = time.time()
for i in range(len(a)):
    c[i] = a[i] * b[i]
end = time.time()

print("time taken with lists: ", end - st)

time taken with lists:  1.8372061252593994


In [14]:
na = np.array(a)
nb = np.array(b)

st = time.time()
nc = na * nb
end = time.time()

print("time taken with numpy: ", end - st)

time taken with numpy:  0.0674598217010498


# Matrix Multiplication
- np.matmul()
- a @ b

In [15]:
marks = np.array([[70,68,40],[90,98,99],[81,74,88],[12,31,42],[33,56,32]])

mean_marks = np.mean(marks, axis = 1)
print(mean_marks)

print(np.where(mean_marks == np.max(mean_marks))[0][0])

subject_top_score = np.max(marks, axis = 0)
print(subject_top_score)

print("Subject toppers: ", np.argmax(marks, axis = 0))

[59.33333333 95.66666667 81.         28.33333333 40.33333333]
1
[90 98 99]
Subject toppers:  [1 1 1]


## NumPy Array Copy vs View

In [16]:
arr = np.array([1,2,3,4,5,6])

cp = arr.copy() #creates a new separate copy
vw = arr.view() #creates a view of the original array/

arr[0] = 100

print(cp)
print(vw)

vw[1] = 100
print(arr)
print(cp)
print(vw)

#to check if its a view or a copy
print(cp.base) #The copy returns None.
print(vw.base) #The view returns the original array.

[1 2 3 4 5 6]
[100   2   3   4   5   6]
[100 100   3   4   5   6]
[1 2 3 4 5 6]
[100 100   3   4   5   6]
None
[100 100   3   4   5   6]


## Iterating an array

In [17]:
arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

for x in np.nditer(arr):
  print(x)

1
2
3
4
5
6
7
8


In [18]:
arr = np.array([1, 2, 3])

for idx, x in np.ndenumerate(arr):
  print(idx, x)

(0,) 1
(1,) 2
(2,) 3


In [20]:
arr1 = np.array([1, 2, 3])

arr2 = np.array([4, 5, 6])

arr = np.stack((arr1, arr2), axis=1)

print(arr)

arr = np.stack((arr1, arr2), axis=0)

print(arr)

[[1 4]
 [2 5]
 [3 6]]
[[1 2 3]
 [4 5 6]]
