# Notes


My personal notes.

# Working with CSV files in Python

One can use the inbuilt module named csv to work with csv files.
Below is a code sample showing how to read the csv file

In [2]:
import csv

#%precision 2 # sets the floating point precision to 2

with open('process.csv') as csvfile:
    newlist = list(csv.DictReader(csvfile))
    
newlist[0]

{'BasePriority': '8',
 'CPU': '37.0625',
 'Company': 'Alps Electric Co., Ltd.',
 'Container': '',
 'Description': 'ApMsgFwd',
 'EnableRaisingEvents': 'False',
 'ExitCode': '',
 'ExitTime': '',
 'FileVersion': '8, 1, 0, 44',
 'Handle': '2408',
 'HandleCount': '112',
 'Handles': '112',
 'HasExited': 'False',
 'Id': '8280',
 'MachineName': '.',
 'MainModule': 'System.Diagnostics.ProcessModule (ApMsgFwd.exe)',
 'MainWindowHandle': '0',
 'MainWindowTitle': '',
 'MaxWorkingSet': '1413120',
 'MinWorkingSet': '204800',
 'Modules': 'System.Diagnostics.ProcessModuleCollection',
 'NPM': '8960',
 'Name': 'ApMsgFwd',
 'NonpagedSystemMemorySize': '8960',
 'NonpagedSystemMemorySize64': '8960',
 'PM': '1486848',
 'PagedMemorySize': '1486848',
 'PagedMemorySize64': '1486848',
 'PagedSystemMemorySize': '180144',
 'PagedSystemMemorySize64': '180144',
 'Path': 'C:\\Program Files\\DellTPad\\ApMsgFwd.exe',
 'PeakPagedMemorySize': '1568768',
 'PeakPagedMemorySize64': '1568768',
 'PeakVirtualMemorySize': '981

# NumPy

Package widely used in data science


In [3]:
import numpy as np


In [4]:
# Creating Arrays
mylist = [1, 2, 3]
x = np.array(mylist)
x

array([1, 2, 3])

In [5]:
# can create list in the below manner as well 
y = np.array([4, 5, 6])
y

array([4, 5, 6])

In [6]:
# Create a multi dimensional array
m = np.array([[7, 8, 9], [10, 11, 12]])
m

array([[ 7,  8,  9],
       [10, 11, 12]])

In [7]:
# Understand what type of array, take a look at the shape
m.shape

(2, 3)

In [8]:
# auto generate a range with start, end (excluded) and space to the arange method
n = np.arange(0, 30, 2)
n

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])

In [9]:
# convert an array to another shape e.g. multi dimensional array.
# use the reshape() method
n = n.reshape(3, 5)
n

array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28]])

In [10]:
o = np.linspace(0, 4, 9)
o

array([ 0. ,  0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ])

In [11]:
o.resize(3, 3)
o

array([[ 0. ,  0.5,  1. ],
       [ 1.5,  2. ,  2.5],
       [ 3. ,  3.5,  4. ]])

In [12]:
np.ones((3,2)) # returns array of ones

array([[ 1.,  1.],
       [ 1.,  1.],
       [ 1.,  1.]])

In [13]:
np.zeros((2, 3)) # returns array of zeroes

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [14]:
# return with ones in diagonal and zeroes everywhere else
np.eye(4)

array([[ 1.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.],
       [ 0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  1.]])

In [15]:
# create a diagonal array, argument passed a linear array
np.diag(y)

array([[4, 0, 0],
       [0, 5, 0],
       [0, 0, 6]])

In [16]:
# one can use * operator this way
np.array([1, 2, 3] * 3)

array([1, 2, 3, 1, 2, 3, 1, 2, 3])

In [17]:
# NumPy also has a repeat method, notice how the output differs from above one
np.repeat([1, 2, 3], 3)

array([1, 1, 1, 2, 2, 2, 3, 3, 3])

In [18]:
# one can vertically stack arrays on top of each other as well
p = np.ones([2,3], int)
np.vstack([p, 2*p])


array([[1, 1, 1],
       [1, 1, 1],
       [2, 2, 2],
       [2, 2, 2]])

In [19]:
# Also the horizontal stacking can be done
np.hstack([p, 2*p])

array([[1, 1, 1, 2, 2, 2],
       [1, 1, 1, 2, 2, 2]])

# Operations with NumPy arrays


In [20]:
# element wise addition, subtraction, multiplication and division is straight forward
x + y


array([5, 7, 9])

In [21]:
x * y

array([ 4, 10, 18])

In [22]:
# Raise all the elements of the array to a power
x**2

array([1, 4, 9], dtype=int32)

In [23]:
# Transpose the array using T attribute

z = np.array([y, y**2])
z

array([[ 4,  5,  6],
       [16, 25, 36]])

In [24]:
z.shape # initial shape of the array

(2, 3)

In [25]:
z.T

array([[ 4, 16],
       [ 5, 25],
       [ 6, 36]])

In [26]:
z.T.shape # see the shape of the transpose'd array

(3, 2)

In [27]:
z.dtype # this is used to see the tupe of the array

dtype('int32')

In [28]:
# one can cast the array as a different type as well
z = z.astype('f')
z.dtype

dtype('float32')

### Commonly used math functions in NumPy

In [29]:
a = np.array([-4, -2, 1, 3, 5])
a


array([-4, -2,  1,  3,  5])

In [30]:
# sum of the elements of the array
a.sum()

3

In [31]:
a.max() # maximum value in the array

5

In [32]:
a.min() # min value in array

-4

In [33]:
a.mean() # mean of the array

0.59999999999999998

In [34]:
a.std() # see the standard deviation of the array

3.2619012860600183

In [35]:
# see the index of the max or min value
a.argmax()

4

In [36]:
a.argmin()

0

# Indexing and Slicing

In [37]:
s = np.arange(13)**2
s

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100, 121, 144], dtype=int32)

In [38]:
s[0], s[4], s[0:3]

(0, 16, array([0, 1, 4], dtype=int32))

In [39]:
s[1:5]

array([ 1,  4,  9, 16], dtype=int32)

In [40]:
s[-4:] # last 4 elements

array([ 81, 100, 121, 144], dtype=int32)

In [41]:
s[-5::-2] # going backward by a step over of 2

array([64, 36, 16,  4,  0], dtype=int32)

In [42]:
r = np.arange(36)
r.resize([6,6])
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [43]:
r[2,2] # reference an element


14

In [44]:
r[3, 3:6] # 3rd row's -> 3,4,5 column

array([21, 22, 23])

In [45]:
r[:2, :-1] # row 0 & 1 -> every column except the last


array([[ 0,  1,  2,  3,  4],
       [ 6,  7,  8,  9, 10]])

In [46]:
r[-1, ::2] # last row, every second element

array([30, 32, 34])

In [47]:
r[r > 30] # Conditional indexing is possible

array([31, 32, 33, 34, 35])

In [48]:
r[r>30] = 30 # Conditional assignment, will set all the values to 30 which meet the condition

In [49]:
r # see the values which were above 30


array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

#### Copying data in NumPy

In [50]:
r2 = r[:3, :3]
r2

array([[ 0,  1,  2],
       [ 6,  7,  8],
       [12, 13, 14]])

In [51]:
r2[:] = 0 # set all the elements to 0

In [52]:
r2

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [53]:
r # see the original array r, it seems to have been modified !!. References are passed with NumPy arrays

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [54]:
r_copy = r.copy() # Wish to not touch the initial copy of the array, use copy() method to get a new array

In [55]:
r_copy

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [56]:
r_copy[:] = 10
print(r_copy)
print("==========================")
print(r)


[[10 10 10 10 10 10]
 [10 10 10 10 10 10]
 [10 10 10 10 10 10]
 [10 10 10 10 10 10]
 [10 10 10 10 10 10]
 [10 10 10 10 10 10]]
[[ 0  0  0  3  4  5]
 [ 0  0  0  9 10 11]
 [ 0  0  0 15 16 17]
 [18 19 20 21 22 23]
 [24 25 26 27 28 29]
 [30 30 30 30 30 30]]


#### Iterate over NumPy Arrays

In [57]:
test = np.random.randint(0, 10, [4, 3])
test

array([[1, 9, 2],
       [1, 7, 9],
       [4, 1, 4],
       [0, 1, 2]])

In [58]:
for row in test: # Iterate over a row at a time
    print(row)

[1 9 2]
[1 7 9]
[4 1 4]
[0 1 2]


In [59]:
for i in range(len(test)): # iterate over the row indices
    print(test[i])

[1 9 2]
[1 7 9]
[4 1 4]
[0 1 2]


In [60]:
# Combine the above two approaches of getting an index and row by using enumerate
for i, row in enumerate(test):
    print('row', i, 'is', row)
    

row 0 is [1 9 2]
row 1 is [1 7 9]
row 2 is [4 1 4]
row 3 is [0 1 2]


In [61]:
test2 = test**2
test2

array([[ 1, 81,  4],
       [ 1, 49, 81],
       [16,  1, 16],
       [ 0,  1,  4]], dtype=int32)

In [62]:
# Iterate through both arrays
for i, j in zip(test, test2):
    print(i, '+', j, '=', i+j)

[1 9 2] + [ 1 81  4] = [ 2 90  6]
[1 7 9] + [ 1 49 81] = [ 2 56 90]
[4 1 4] + [16  1 16] = [20  2 20]
[0 1 2] + [0 1 4] = [0 2 6]
