## Numpy indexing techniques and understanding what array shapes mean.

In [None]:
import numpy as np

In [None]:
# 1d array, 10 rows
x1 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x1

In [None]:
# shape of x
x1.shape

In [None]:
# select 2nd and 7th item
x1[[2,7]]

In [None]:
# boolean index
x1[[True,False,False,False,True,True,True,True,True,False]]

In [None]:
# find whether x1>5
x1>5

In [None]:
# find when x1 is greater than 5
x1[x1>5]

In [None]:
# index 5 to end
x1[5:]

In [None]:
# index 1 to 7 every 2
x1[1:7:2]

In [None]:
# indexing backwards
x1[8:3:-1]

In [None]:
# 1d array
x2 = np.array([[0],[1], [2], [3], [4], [5], [6], [7], [8], [9]])
x2

In [None]:
# shape of x2
x2.shape

In [None]:
# x, shape (10,)

i= 0    1    2    3    4    5    6    7    8    9  
┌────┬────┬────┬────┬────┬────┬────┬────┬────┬────┐
│  0 │  1 │  2 │  3 │  4 │  5 │  6 │  7 │  8 │  9 │ 
└────┴────┴────┴────┴────┴────┴────┴────┴────┴────┘

In [None]:
# x2, shape (10,1)

i= 0    1    2    3    4    5    6    7    8    9  
j= 0    0    0    0    0    0    0    0    0    0  
┌────┬────┬────┬────┬────┬────┬────┬────┬────┬────┐
│  0 │  1 │  2 │  3 │  4 │  5 │  6 │  7 │  8 │  9 │
└────┴────┴────┴────┴────┴────┴────┴────┴────┴────┘

In [None]:
x1[5]

In [None]:
x2[5]

In [None]:
x2[5,0]

In [None]:
# 2d array
z = np.array([[1,2,3],[4,5,6],[7,8,9]])

In [None]:
# values of z
z

In [None]:
# shape of z
z.shape

In [None]:
# y1 shape (3,)
y1 = np.array([1,2,3])
print( 'y1 shape is' + str(y1.shape))
# y2 shape(3,1)
y2 = np.array([[1],[2],[3]])
print( 'y2 shape is' + str(y2.shape))

In [None]:
# matrix multiplication
# z*y1
z_y1 = np.matmul(z,y1)
z_y1

In [None]:
# shape of z*y1
z_y1.shape

In [None]:
# matrix multiplication
# z*y2
z_y2 = np.matmul(z,y2)
z_y2

#### The main practical difference between arrays with shape (n,) and (n,1) is that some models require data in the (n,) form and other models require data in  (n,1).

In [None]:
# shape of z*y2
z_y2.shape

In [None]:
# reshape y1
print(y1.shape)
print(y1.reshape((3,1)).shape)

In [None]:
# alues of z
z

In [None]:
# select first column of z with shape (3,)
z[:,0]

In [None]:
z[:,0].shape

In [None]:
# select first column of z with shape (3,1)
z[:,[0]]

In [None]:
z[:,[0]].shape

## Summary

#### 1. Numpy arrays can be index by passing a scalar, a list of indices, a sequence, and booleans.
#### 2. Numpy performs quick matrix multiplications, which is used for many machine learning models.
#### 3. 1-d arrays can have one or multiple indices.
#### 4. The shape of an array needs to match the required shape to be successfully pass into a model.

# https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html

## How to add a new column to a dataframe and sort by the column?

In [None]:
import pandas as pd

In [None]:
# Load dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
df = pd.read_csv(url, names=names)

In [None]:
df.head()

In [None]:
# method 1: assign a new column
#reload data file
df = pd.read_csv(url, names=names)

# add new columns
df["petal-ratio"] = df["petal-length"]/df["petal-width"]

# sort
df = df.sort_values(by=['petal-ratio'], ascending = True)
df.head()

In [None]:
# method 2: pd.insert
#reload data file
df = pd.read_csv(url, names=names)

# add new columns
df.insert(5, "petal-ratio", df["petal-length"]/df["petal-width"], True) 

# sort
df = df.sort_values(by=['petal-ratio'], ascending = True)
df.head()

In [None]:
# method 3: pd.concat
#reload data file
df = pd.read_csv(url, names=names)

# add new columns
new_col = pd.DataFrame(df["petal-length"]/df["petal-width"], columns = ['petal-ratio'])
df = pd.concat([df,new_col], axis =1)

# sort
df = df.sort_values(by=['petal-ratio'], ascending = True)
df.head()

## How to convert a dict into a lists of lists containing key-value pairs?

In [1]:
simple_dict = {'a':0,'b':1,'c':2,'d':3}

In [2]:
list_key_value = [ [key,value] for key, value in simple_dict.items() ]


In [3]:
list_key_value

[['a', 0], ['b', 1], ['c', 2], ['d', 3]]

In [4]:
list_key_value[0]

['a', 0]

In [5]:
help(dict)

Help on class dict in module builtins:

class dict(object)
 |  dict() -> new empty dictionary
 |  dict(mapping) -> new dictionary initialized from a mapping object's
 |      (key, value) pairs
 |  dict(iterable) -> new dictionary initialized as if via:
 |      d = {}
 |      for k, v in iterable:
 |          d[k] = v
 |  dict(**kwargs) -> new dictionary initialized with the name=value pairs
 |      in the keyword argument list.  For example:  dict(one=1, two=2)
 |  
 |  Methods defined here:
 |  
 |  __contains__(self, key, /)
 |      True if D has a key k, else False.
 |  
 |  __delitem__(self, key, /)
 |      Delete self[key].
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __getitem__(...)
 |      x.__getitem__(y) <==> x[y]
 |  
 |  __gt__(self, value, /)
 |      Return self>value.
 |  
 |  __init__(self, /, *args, **kwargs)
 |