### Import dependencies

In [4]:
import numpy as np

### Creating arrays

In [5]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [6]:
np.ones(10)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [7]:
np.full(6, 2.6)

array([2.6, 2.6, 2.6, 2.6, 2.6, 2.6])

In [8]:
 # convert a python list to an array eg 
 
 a = [1, 3, 4, 5, 6]
 
 b = np.array(a)
 
 b

array([1, 3, 4, 5, 6])

In [9]:
b[2] = 9

In [10]:
b

array([1, 3, 9, 5, 6])

In [11]:
np.arange(3, 10, 3)

array([3, 6, 9])

In [12]:
np.linspace(0, 10, 11)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

### Multidimensional arrays

In [13]:
  np.zeros((4,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [14]:
 n = np.array([[2,4,4], [2, 5, 6], [50, 68, 58]])

In [15]:
n[2, 1]

68

In [16]:
 n[1:, 2]

array([ 6, 58])

### Randomly generated arrays

In [17]:
np.random.seed(2)
np.random.rand(5, 2) * 100

array([[43.59949021,  2.59262318],
       [54.96624779, 43.53223926],
       [42.03678021, 33.0334821 ],
       [20.4648634 , 61.92709664],
       [29.96546737, 26.68272751]])

In [18]:
np.random.randint(low = 5, high = 20 , size=(5, 2) )

array([[17,  9],
       [10, 12],
       [ 8, 11],
       [ 9, 15],
       [16,  8]])

### Element-wise operations

In [19]:
 c = np.arange(5) 
 c

array([0, 1, 2, 3, 4])

In [20]:
c + 1

array([1, 2, 3, 4, 5])

In [21]:
d = (20 + (c * 2)) ** 2

In [22]:
c + d

array([400, 485, 578, 679, 788])

 ### Comparison operations

In [23]:
d >= 500

array([False, False,  True,  True,  True])

In [24]:
c < d

array([ True,  True,  True,  True,  True])

In [25]:
d[d >= 500]

array([576, 676, 784])

### Summary Operations

In [26]:
d.min()

400

In [27]:
d.mean()

584.0

In [28]:
b.std()

2.7129319932501073

### Linear algebra refresher

In [29]:
u = np.array([2, 7, 5])
v = np.array([3, 4, 8])

# addition 
u + v

# subtraction 
u - v

# scalar multiplication 
2 * v

array([ 6,  8, 16])

In [30]:
u.shape

(3,)

In [31]:
def vector_vector_multiplication(u, v):
    assert u.shape[0] == v.shape[0]
    
    n = u.shape[0]
    
    result = 0.0

    for i in range(n):
        result = result + u[i] * v[i]
    
    return result

In [32]:
vector_vector_multiplication(u, v)

74.0

In [33]:
# equivalent in numpy 

u.dot(v)

74

In [34]:
def matrix_vector_multiplication(U, v):
    assert U.shape[1] == v.shape[0]
    
    num_rows = U.shape[0]
    
    result = np.zeros(num_rows)
    
    for i in range(num_rows):
        result[i] = vector_vector_multiplication(U[i], v)
    
    return result

In [35]:
matrix_vector_multiplication(n, u)

array([ 52.,  69., 866.])

In [36]:
np.dot(n, u)

array([ 52,  69, 866])

In [37]:
def matrix_matrix_multiplication(m, o):
    assert m.shape[1] == o.shape[0]
    
    n_col = o.shape[1]
     
    result = np.zeros((m.shape[0], o.shape[1]))
    
    for i in range(n_col):
        ui = o[:, i]
        uvi = matrix_vector_multiplication(m, ui)
        result[:, i] = uvi
        
    return result

In [38]:
matrix_matrix_multiplication(n, n)

array([[ 212.,  300.,  264.],
       [ 314.,  441.,  386.],
       [3136., 4484., 3972.]])

### Identity matrix

In [39]:
 v = np.eye(3)

In [40]:
v

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [41]:
u.dot(v)

array([2., 7., 5.])

### Matrix inverse

In [42]:
vs = np.linalg.inv(v)

In [43]:
vs.dot(v)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

## Introduction to pandas

In [44]:
import pandas as pd

### Dataframe

In [45]:
data = [
    ['Nissan', 'Stanza', 1991, 138, 4, 'MANUAL', 'sedan', 2000],
    ['Hyundai', 'Sonata', 2017, None, 4, 'AUTOMATIC', 'Sedan', 27150],
    ['Lotus', 'Elise', 2010, 218, 4, 'MANUAL', 'convertible', 54990],
    ['GMC', 'Acadia',  2017, 194, 4, 'AUTOMATIC', '4dr SUV', 34450],
    ['Nissan', 'Frontier', 2017, 261, 6, 'MANUAL', 'Pickup', 32340],
]

columns = [
    'Make', 'Model', 'Year', 'Engine HP', 'Engine Cylinders',
    'Transmission Type', 'Vehicle_Style', 'MSRP'
]

In [46]:
df = pd.DataFrame(data, columns=columns)

In [47]:
data = [
    {
        "Make": "Nissan",
        "Model": "Stanza",
        "Year": 1991,
        "Engine HP": 138.0,
        "Engine Cylinders": 4,
        "Transmission Type": "MANUAL",
        "Vehicle_Style": "sedan",
        "MSRP": 2000
    },
    {
        "Make": "Hyundai",
        "Model": "Sonata",
        "Year": 2017,
        "Engine HP": None,
        "Engine Cylinders": 4,
        "Transmission Type": "AUTOMATIC",
        "Vehicle_Style": "Sedan",
        "MSRP": 27150
    },
    {
        "Make": "Lotus",
        "Model": "Elise",
        "Year": 2010,
        "Engine HP": 218.0,
        "Engine Cylinders": 4,
        "Transmission Type": "MANUAL",
        "Vehicle_Style": "convertible",
        "MSRP": 54990
    },
    {
        "Make": "GMC",
        "Model": "Acadia",
        "Year": 2017,
        "Engine HP": 194.0,
        "Engine Cylinders": 4,
        "Transmission Type": "AUTOMATIC",
        "Vehicle_Style": "4dr SUV",
        "MSRP": 34450
    },
    {
        "Make": "Nissan",
        "Model": "Frontier",
        "Year": 2017,
        "Engine HP": 261.0,
        "Engine Cylinders": 6,
        "Transmission Type": "MANUAL",
        "Vehicle_Style": "Pickup",
        "MSRP": 32340
    }
]

In [48]:
df_2 = pd.DataFrame(data)
df_2

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
2,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


### Series

In [49]:
df['Engine HP']

0    138.0
1      NaN
2    218.0
3    194.0
4    261.0
Name: Engine HP, dtype: float64

In [50]:
df[['Make', 'Model', 'MSRP']]

Unnamed: 0,Make,Model,MSRP
0,Nissan,Stanza,2000
1,Hyundai,Sonata,27150
2,Lotus,Elise,54990
3,GMC,Acadia,34450
4,Nissan,Frontier,32340


In [51]:
df['id'] = [10, 34, 67, 45, 60]

In [52]:
del df['id']

In [53]:
df

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
2,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


### Index

In [54]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [55]:
df.Make.index

RangeIndex(start=0, stop=5, step=1)

In [56]:
df.loc[[2, 3]]

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
2,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450


In [57]:
df.index = ["a", "b", "c", "d", "e"]

In [58]:
df.iloc[3]

Make                       GMC
Model                   Acadia
Year                      2017
Engine HP                194.0
Engine Cylinders             4
Transmission Type    AUTOMATIC
Vehicle_Style          4dr SUV
MSRP                     34450
Name: d, dtype: object

In [59]:
df.iloc[[3, 4]]

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
d,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
e,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


In [60]:
df.reset_index(drop=True)

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
2,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


### Element-wise operation

In [61]:
df["Engine HP"] / 100

a    1.38
b     NaN
c    2.18
d    1.94
e    2.61
Name: Engine HP, dtype: float64

### Filtering

In [62]:
df[(df['Year'] >= 2015) & (df['Make'] == 'Nissan')]

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
e,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


### String operations

In [63]:
 df['Vehicle_Style'].str.replace(" ", "_").str.lower()

a          sedan
b          sedan
c    convertible
d        4dr_suv
e         pickup
Name: Vehicle_Style, dtype: object

### Summarizing operation

In [64]:
df.Year.min()

1991

In [65]:
df.describe().round()

Unnamed: 0,Year,Engine HP,Engine Cylinders,MSRP
count,5.0,4.0,5.0,5.0
mean,2010.0,203.0,4.0,30186.0
std,11.0,51.0,1.0,18985.0
min,1991.0,138.0,4.0,2000.0
25%,2010.0,180.0,4.0,27150.0
50%,2017.0,206.0,4.0,32340.0
75%,2017.0,229.0,4.0,34450.0
max,2017.0,261.0,6.0,54990.0


In [66]:
df.nunique()

Make                 4
Model                5
Year                 3
Engine HP            4
Engine Cylinders     2
Transmission Type    2
Vehicle_Style        5
MSRP                 5
dtype: int64

### Missing Value

In [67]:
df.isnull().sum()

Make                 0
Model                0
Year                 0
Engine HP            1
Engine Cylinders     0
Transmission Type    0
Vehicle_Style        0
MSRP                 0
dtype: int64

### Grouping

In [68]:
df.groupby("Year").MSRP.max()

Year
1991     2000
2010    54990
2017    34450
Name: MSRP, dtype: int64

In [69]:
df.Year.values

array([1991, 2017, 2010, 2017, 2017], dtype=int64)

In [70]:
df.to_dict(orient='records')

[{'Make': 'Nissan',
  'Model': 'Stanza',
  'Year': 1991,
  'Engine HP': 138.0,
  'Engine Cylinders': 4,
  'Transmission Type': 'MANUAL',
  'Vehicle_Style': 'sedan',
  'MSRP': 2000},
 {'Make': 'Hyundai',
  'Model': 'Sonata',
  'Year': 2017,
  'Engine HP': nan,
  'Engine Cylinders': 4,
  'Transmission Type': 'AUTOMATIC',
  'Vehicle_Style': 'Sedan',
  'MSRP': 27150},
 {'Make': 'Lotus',
  'Model': 'Elise',
  'Year': 2010,
  'Engine HP': 218.0,
  'Engine Cylinders': 4,
  'Transmission Type': 'MANUAL',
  'Vehicle_Style': 'convertible',
  'MSRP': 54990},
 {'Make': 'GMC',
  'Model': 'Acadia',
  'Year': 2017,
  'Engine HP': 194.0,
  'Engine Cylinders': 4,
  'Transmission Type': 'AUTOMATIC',
  'Vehicle_Style': '4dr SUV',
  'MSRP': 34450},
 {'Make': 'Nissan',
  'Model': 'Frontier',
  'Year': 2017,
  'Engine HP': 261.0,
  'Engine Cylinders': 6,
  'Transmission Type': 'MANUAL',
  'Vehicle_Style': 'Pickup',
  'MSRP': 32340}]