# Material 
- ML data is represented as arrays
- Discover how to manipulate & access data in np arrays

## 5.2 From List to Array

## 5.3 Array Indexing

## 5.4 Array Slicing

## 5.5 Array Reshaping

############# Break #############

In [3]:
import numpy as np

## 5.2 From List to Array
- Assumes data is loaded or generated & represented using a py list

### 5.2.1 One-Dimensional List to Array
- Load & access data from a list

### 5.2.2 Two-Dimensional List to Array
- Table of data (ie table of students)
- Rows : new observation (ie a new student)/ new list
- Columns : new feature (ie id, name, program, etc)


In [23]:
# create 1-D list
py_list = [11, 22, 33, 44, 55]
print("py_list : ", py_list, "type : ", type(py_list))

# convert py list to np array
np_arr = np.array(py_list)
print("np_arr : ", np_arr, "type : ", type(np_arr))

py_list :  [11, 22, 33, 44, 55] type :  <class 'list'>
np_arr :  [11 22 33 44 55] type :  <class 'numpy.ndarray'>


In [42]:
# create 2-D list
py_list_2d = [[11, 22, 33, 44, 55], [66, 77, 88, 99, 0]]
print("py_list_2d : ", py_list_2d, "shape/ len : ", len(py_list_2d))

# convert py list of lists to np array
np_arr_2d = np.array(py_list_2d)
print("\nnp_arr_2d : ", np_arr_2d, "shape : ", np_arr_2d.shape)

py_list_2d :  [[11, 22, 33, 44, 55], [66, 77, 88, 99, 0]] shape/ len :  2

np_arr_2d :  [[11 22 33 44 55]
 [66 77 88 99  0]] shape :  (2, 5)


In [43]:
# create 2-D list
py_list_2d = [[11, 22], [33, 44], [55, 66]]
print("py_list_2d : ", py_list_2d, "shape/ len : ", len(py_list_2d))

# convert py list of lists to np array
np_arr_book = np.array(py_list_2d)
print("\nnp_arr_book : ", np_arr_book, "shape : ", np_arr_book.shape)

py_list_2d :  [[11, 22], [33, 44], [55, 66]] shape/ len :  3

np_arr_book :  [[11 22]
 [33 44]
 [55 66]] shape :  (3, 2)


## 5.3 Array Indexing

### 5.3.1 One-Dimensional Indexing
- Similar to other programming langs like Java, C#, & C++
- Use bracket operator [insert index starting at 0]

### 5.3.2 Two-Dimensional Indexing
- Use bracket operator [insert index starting at 0, insert index starting at 0]

In [52]:
# index np_arr
first_element = np_arr[0]
print("first_element : ", first_element)
second_element = np_arr[1]
print("\nsecond_element : ", second_element)

# error if index too large
# no_element = np_arr[10]
# print("no_element : ", no_element)

last_element = np_arr[-1]
print("\nlast_element : ", last_element)
second_to_last_element = np_arr[-2]
print("\nsecond_to_last_element : ", second_to_last_element)

first_element :  11

second_element :  22

last_element :  55

second_to_last_element :  44


In [55]:
# only 2 rows so [0] or [1]; [2] or more will be out of range
# only 5 columns so [0] - [4]; [5] or more will be out of range

# first row [0] alone
print(np_arr_2d[0])
print(np_arr_2d[0, 0])
print(np_arr_2d[0, 3])
print(np_arr_2d[0, 4])

# second row [1]
print("\n", np_arr_2d[1, 0])
print(np_arr_2d[1, 1])
print(np_arr_2d[1, 2])
print(np_arr_2d[1, 3])
print(np_arr_2d[1, 4])

[11 22 33 44 55]
11
44
55

 66
77
88
99
0


In [57]:
# only 2 elements across so [0] or [1]; [2] or more will be out of range

# first row [0] alone
print(np_arr_book[0])
print(np_arr_book[0, 0])
print(np_arr_book[0, 1])

# second row [1]
print(np_arr_book[1, 0])

[11 22]
11
22
33


## 5.4 Array Slicing
- A subsequence can be indexed & retrieved
- Used when specifying input & output variables
- Also splitting training rows from testing rows
- Use colon operator : w/ from & to index before & after the column respectively

### 5.4.1 One-Dimensional Slicing
- Using np_arr above

### 5.4.2 Two-Dimensional Slicing
- Using np_arr_2d & np_arr_book above
- Common to split data : input (X) & output (y) # notice casing

In [62]:
# access all data w/ : alone
print(np_arr[:])

# first element - we start at the beginning w/ 0 & go up to but not including the element indexed at 1
print(np_arr[0:1])

# slice last two elements; notice diff w/ -2: & -2 w/ out :
print(np_arr[-2:])
print(np_arr[-2])


[11 22 33 44 55]
[11]
[44 55]
44


In [87]:
# access all data w/ : alone
print(np_arr_2d[ : ], "\n")

# first row only
print("first row only : ", np_arr_2d[0 : 1], "\n")

# second row only - start at index 1; leave after : (or to) part empty bc it's the end
print("np_arr_2d - second row only : ", np_arr_2d[1 : ], "\n")

###### above only w/ 2 rows; below w/ 3 rows; adding 3rd row by calling vstack func [see Ch4]

# create new array
new_arr = np.array([110, 111, 112, 113, 114])
print("new_arr : ", new_arr, "\n")

# call vstack func for new array & np_arr_2d
stack_arr = np.vstack((np_arr_2d, new_arr))
print("stack_arr : ", stack_arr, "shape : ", stack_arr.shape, "\n")

# not the first row
print("don't include the first row : ", stack_arr[1 : ], "\n")

# second row only - start at index 1; leave after : (or to) part empty bc it's the end
print("stack_arr - second row only : ", stack_arr[1 : 2], "\n")

# first & last row
print("stack_arr - first & last row : ", stack_arr[0 : 3 : 2])

[[11 22 33 44 55]
 [66 77 88 99  0]] 

first row only :  [[11 22 33 44 55]] 

np_arr_2d - second row only :  [[66 77 88 99  0]] 

new_arr :  [110 111 112 113 114] 

stack_arr :  [[ 11  22  33  44  55]
 [ 66  77  88  99   0]
 [110 111 112 113 114]] shape :  (3, 5) 

don't include the first row :  [[ 66  77  88  99   0]
 [110 111 112 113 114]] 

stack_arr - second row only :  [[66 77 88 99  0]] 

stack_arr - first & last row :  [[ 11  22  33  44  55]
 [110 111 112 113 114]]


In [111]:
#### notice the shape 

# create 3 column 2-D dataset
data = np.array([
                [11, 22, 33],
                [44, 55, 66],
                [77, 88, 99]
                ])

# X - get every row of first two columns
X = data[:, : -1]
print("X : ", X, "\nalso", data[:, : 2], "\n")

# y - get every row of last column
y = data[:, -1]
print("y : ", y, "\nalso", data[:, 2: 3], "\n")

print("first column only : ", data[:, : -2], "\nalso", data[:, : 1])

X :  [[11 22]
 [44 55]
 [77 88]] 
also [[11 22]
 [44 55]
 [77 88]] 

y :  [33 66 99] 
also [[33]
 [66]
 [99]] 

first column only :  [[11]
 [44]
 [77]] 
also [[11]
 [44]
 [77]]


In [129]:
split = 1
train, test = data[: split, :], data[split :, :]
print("train first row only: ", train, "\ntest : ", test, "\n")

split = 2
train, test = data[: split, :], data[split :, :]
print("train first two rows: ", train, "\ntest : ", test, "\n")

split = 3
train, test = data[: split : 2], data[split - 2 : 2 :]
print("train first & last rows: ", train, "\ntest second row : ", test, "\n")

train first row only:  [[11 22 33]] 
test :  [[44 55 66]
 [77 88 99]] 

train first two rows:  [[11 22 33]
 [44 55 66]] 
test :  [[77 88 99]] 

train first & last rows:  [[11 22 33]
 [77 88 99]] 
test second row :  [[44 55 66]] 



## 5.5 Array Reshaping

### 5.5.1 Data Shape

### 5.5.2 Reshape 1D to 2D Array
- Use reshape() func

### 5.5.3 Reshape 2D to 3D Array
- Reshape from a 3 x 2 to a 3 rows, 2 in each column, 1 column

In [149]:
print("np_arr : ", np_arr, "\nnp_arr.shape : ", np_arr.shape, "\nrow : ", np_arr.shape[0], "\n")

print("np_arr_2d.shape : ", np_arr_2d.shape, "\nrow : ", np_arr_2d.shape[0], "\ncol : ", np_arr_2d.shape[1], "\n")

print("data.shape : ", data.shape, "\nrow : ", data.shape[0], "\ncol : ", data.shape[1])

np_arr :  [11 22 33 44 55] 
np_arr.shape :  (5,) 
row :  5 

np_arr_2d.shape :  (2, 5) 
row :  2 
col :  5 

data.shape :  (3, 3) 
row :  3 
col :  3


In [152]:
# reshape np_arr
reshape_np_arr = np_arr.reshape(1, np_arr.shape[0])
print("reshape_np_arr : ", reshape_np_arr, "shape : ", reshape_np_arr.shape)

reshape_np_arr = np_arr.reshape(np_arr.shape[0], 1)
print("reshape_np_arr : ", reshape_np_arr, "shape : ", reshape_np_arr.shape)

reshape_np_arr :  [[11 22 33 44 55]] shape :  (1, 5)
reshape_np_arr :  [[11]
 [22]
 [33]
 [44]
 [55]] shape :  (5, 1)


In [158]:
print("np_arr_book : ", np_arr_book, "\nnp_arr_book shape : ", np_arr_book.shape)
print("np_arr_book[0] : ", np_arr_book.shape[0], "np_arr_book[1], ", np_arr_book.shape[1], "\n")

reshape_np_arr_book = np_arr_book.reshape((np_arr_book.shape[0], np_arr_book.shape[1], 1))
print("reshape_np_arr_book : ", reshape_np_arr_book, "\nreshape_np_arr_book shape : ", reshape_np_arr_book.shape)

np_arr_book :  [[11 22]
 [33 44]
 [55 66]] 
np_arr_book shape :  (3, 2)
np_arr_book[0] :  3 np_arr_book[1],  2 

reshape_np_arr_book :  [[[11]
  [22]]

 [[33]
  [44]]

 [[55]
  [66]]] reshape_np_arr_book shape :  (3, 2, 1)
