# **Intro to Numpy**

### Importing Package

In [None]:
import numpy as np

### Creating arrays

In [None]:
#create an array of zeros with 10 elements 
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [None]:
#create an array of ones with 10 elements  
np.ones(10)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [None]:
#create an array of shape 10 and filled value 2.5 
np.full(10,2.5)

array([2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5])

In [None]:
#covert a list into an array
a= np.array([1,2,3,4,5,7,12])
a

array([ 1,  2,  3,  4,  5,  7, 12])

In [None]:
#access an element of an array 
a[2]

3

In [None]:
#change an element 
a[2] = 10
a

array([ 1,  2, 10,  4,  5,  7, 12])

In [None]:
#create an array with spaced values within a given interval 
np.arange(3,10)
np.arange(3,10,2)

array([3, 5, 7, 9])

In [None]:
np.linspace(0,1,11)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

**Multi-dimensional arrays**

In [None]:
np.zeros((10,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [None]:
n=np.array([
          [1,2,3],
          [4,5,6],
          [7,8,9]
])
n

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [None]:
n[0][0]

1

In [None]:
n[1,:]

array([4, 5, 6])

**Randomly generated arrays**

In [None]:
#create a random arrays with values rage between 0 and  1 with 2 colums and 5 rows 
np.random.rand(5,2)

array([[0.41031527, 0.44420257],
       [0.23468722, 0.95096066],
       [0.00456806, 0.70869762],
       [0.24958638, 0.32713484],
       [0.97477494, 0.74221171]])

In [None]:
#we can fix the random values 
np.random.seed(2)
np.random.rand(5,2)


array([[0.4359949 , 0.02592623],
       [0.54966248, 0.43532239],
       [0.4203678 , 0.33033482],
       [0.20464863, 0.61927097],
       [0.29965467, 0.26682728]])

In [None]:
np.random.seed(2)
np.random.randn(5,2)

array([[-0.41675785, -0.05626683],
       [-2.1361961 ,  1.64027081],
       [-1.79343559, -0.84174737],
       [ 0.50288142, -1.24528809],
       [-1.05795222, -0.90900761]])

In [None]:
np.random.seed(2)
np.random.randint(low = 0 ,high = 100 , size=(5,2))

array([[40, 15],
       [72, 22],
       [43, 82],
       [75,  7],
       [34, 49]])

### Element-wise operations

In [None]:
a = np.arange(5)
a

array([0, 1, 2, 3, 4])

In [None]:
#add to every array element  
a + 1 

array([1, 2, 3, 4, 5])

In [None]:
#multiply 
a * 2

array([0, 2, 4, 6, 8])

In [None]:
10+((a *2)**2)

array([10, 14, 26, 46, 74])

In [None]:
b = (10 + (a * 2 )) ** 2  / 100
b

array([1.  , 1.44, 1.96, 2.56, 3.24])

In [None]:
#combine 2 arrays 
a + b 


array([1.  , 2.44, 3.96, 5.56, 7.24])

### Comparison Operations

In [None]:
a >= 2 

array([False, False,  True,  True,  True])

In [None]:
 a = b 
 a 

array([1.  , 1.44, 1.96, 2.56, 3.24])

In [None]:
a > b 

array([False, False,  True,  True,  True])

In [None]:
a
a[a>b]

array([2, 3, 4])

### Summarizing operations

In [None]:
a

array([0, 1, 2, 3, 4])

In [None]:
a.mean()

2.0

In [None]:
a.std()

1.4142135623730951

In [None]:
a.max()

4

# **Linear Algebra**

### Vector Operations

In [None]:
u = np.array([2,4,5,6])
v = np.array([1,0,0,2])

In [None]:
#addition
u + v 

array([3, 4, 5, 8])

**1. Vector-vector multiplication**

In [None]:
#vector-vector multiplication ( dot product)
u * v 

array([ 2,  0,  0, 12])

In [None]:
#return assertion error if the condition is not true 
assert u.shape[0] == v.shape[0]

In [None]:
u.shape[0]

4

In [None]:
v.shape[0]

4

In [None]:
 def vector_vector_multiplication(u , v ):
   assert u.shape[0] == v.shape[0]
   n = u.shape[0]
   result  = 0.0
   for i in range(n):
     result = result + u[i] * v[i]
   return result


In [None]:
vector_vector_multiplication(u , v )

14.0

**2. Marix-vector multiplication**

In [None]:
U = np.array([
              [5,2,4,6],
              [1,2,1,2],
              [3,1,2,1]
])

In [None]:
U.shape[0]

3

In [None]:
U.shape[1]

4

In [None]:
def matrix_vector_multiplication(U , v ):
   assert U.shape[1] == v.shape[0]
   num_rows = U.shape[0]
   result  = np.zeros(num_rows)
   for i in range(num_rows):
     result[i] = vector_vector_multiplication(U[i] , v )
   return result

In [None]:
matrix_vector_multiplication(U , v )

array([17.,  5.,  5.])

In [None]:
U.dot(v)

array([17,  5,  5])

In [None]:
u.dot(v)

14

**3- matrix-matrix multiplication**

In [None]:
V = np.array([
              [1,1,2],
              [0,0.5,1],
              [0,2,1],
              [2,1,0]             
              ])

In [None]:
V.shape[0]

4

In [None]:
V.shape[1]

3

In [None]:
U.dot(V)

array([[17. , 20. , 16. ],
       [ 5. ,  6. ,  5. ],
       [ 5. ,  8.5,  9. ]])

In [None]:
def matrix_matrix_multiplication(U , V ):
   assert U.shape[1] == V.shape[0]
   num_rows = U.shape[0]
   num_cols = V.shape[1]
   result  = np.zeros((num_rows, num_cols))
   for i in range(num_cols):
     vi = V[:,i]
     Uvi = matrix_vector_multiplication(U , vi )
     result[:,i] = Uvi
   return result

In [None]:
matrix_matrix_multiplication(U , V )

array([[17. , 20. , 16. ],
       [ 5. ,  6. ,  5. ],
       [ 5. ,  8.5,  9. ]])

### Identity matrix

In [None]:
I = np.eye(3)
I

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [None]:
V.dot(I)

array([[1. , 1. , 2. ],
       [0. , 0.5, 1. ],
       [0. , 2. , 1. ],
       [2. , 1. , 0. ]])

### Inverse matrix

In [None]:
 Vs = V[[0,1,2]]
 Vs

array([[1. , 1. , 2. ],
       [0. , 0.5, 1. ],
       [0. , 2. , 1. ]])

In [None]:
Vs_inv=np.linalg.inv(Vs)
Vs_inv

array([[ 1.        , -2.        ,  0.        ],
       [ 0.        , -0.66666667,  0.66666667],
       [ 0.        ,  1.33333333, -0.33333333]])

In [None]:
Vs_inv.dot(Vs)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

# Intro to Pandas

### Importing package

In [1]:
import numpy as np
import pandas as pd

### Dataframes

#### Manipulating Dataframe 

In [2]:
data = [
    ['Nissan', 'Stanza', 1991, 138, 4, 'MANUAL', 'sedan', 2000],
    ['Hyundai', 'Sonata', 2017, None, 4, 'AUTOMATIC', 'Sedan', 27150],
    ['Lotus', 'Elise', 2010, 218, 4, 'MANUAL', 'convertible', 54990],
    ['GMC', 'Acadia',  2017, 194, 4, 'AUTOMATIC', '4dr SUV', 34450],
    ['Nissan', 'Frontier', 2017, 261, 6, 'MANUAL', 'Pickup', 32340],
]

columns = [
    'Make', 'Model', 'Year', 'Engine HP', 'Engine Cylinders',
    'Transmission Type', 'Vehicle_Style', 'MSRP'
]

In [3]:
df= pd.DataFrame(data  , columns = columns)
df

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
2,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


In [4]:
df.head(2)

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150


In [5]:
df.Model

0      Stanza
1      Sonata
2       Elise
3      Acadia
4    Frontier
Name: Model, dtype: object

In [6]:
df["Model"]

0      Stanza
1      Sonata
2       Elise
3      Acadia
4    Frontier
Name: Model, dtype: object

In [7]:
#get a subset of our dataframe
df[['Make','Year']]

Unnamed: 0,Make,Year
0,Nissan,1991
1,Hyundai,2017
2,Lotus,2010
3,GMC,2017
4,Nissan,2017


In [8]:
df['id'] = [1,2,3,4,5]
df

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP,id
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000,1
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150,2
2,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990,3
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450,4
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340,5


In [9]:
del df['id']

#### Index

In [10]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [11]:
df.Make.index

RangeIndex(start=0, stop=5, step=1)

In [12]:
df.index = ['a','b','c','d','e']
df

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
a,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
b,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
c,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990
d,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
e,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


In [13]:
df.loc[['a','b']]

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
a,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
b,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150


In [14]:
df.iloc[1]

Make                   Hyundai
Model                   Sonata
Year                      2017
Engine HP                  NaN
Engine Cylinders             4
Transmission Type    AUTOMATIC
Vehicle_Style            Sedan
MSRP                     27150
Name: b, dtype: object

In [15]:
df.reset_index()

Unnamed: 0,index,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,a,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,b,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
2,c,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990
3,d,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
4,e,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


In [18]:
df = df.reset_index(drop = True)
df

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
2,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


#### Accessing Elements 

####  Element wise operations 

In [20]:
df["Engine HP"] * 2

0    276.0
1      NaN
2    436.0
3    388.0
4    522.0
Name: Engine HP, dtype: float64

#### Filtering

In [21]:
df[df['Year'] >= 2015]

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


In [24]:
df[(df['Make'] == 'Nissan') &  (df['Year'] >= 2015)]

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


#### String operations

In [26]:
#lowercase
df['Vehicle_Style'].str.lower()

0          sedan
1          sedan
2    convertible
3        4dr suv
4         pickup
Name: Vehicle_Style, dtype: object

In [28]:
df['Vehicle_Style'].str.replace(' ','_').str.lower()

0          sedan
1          sedan
2    convertible
3        4dr_suv
4         pickup
Name: Vehicle_Style, dtype: object

#### Summarizing operations

In [29]:
df.MSRP.max()

54990

In [32]:
#for nmerical values 
df.describe()

Unnamed: 0,Year,Engine HP,Engine Cylinders,MSRP
count,5.0,4.0,5.0,5.0
mean,2010.4,202.75,4.4,30186.0
std,11.260551,51.29896,0.894427,18985.044904
min,1991.0,138.0,4.0,2000.0
25%,2010.0,180.0,4.0,27150.0
50%,2017.0,206.0,4.0,32340.0
75%,2017.0,228.75,4.0,34450.0
max,2017.0,261.0,6.0,54990.0


In [34]:
# how many uniq values in each column 
df.nunique()

Make                 4
Model                5
Year                 3
Engine HP            4
Engine Cylinders     2
Transmission Type    2
Vehicle_Style        5
MSRP                 5
dtype: int64

#### Missing Values

In [35]:
df.isnull()

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,False,False,False,False,False,False,False,False
1,False,False,False,True,False,False,False,False
2,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False


In [36]:
df.isnull().sum()

Make                 0
Model                0
Year                 0
Engine HP            1
Engine Cylinders     0
Transmission Type    0
Vehicle_Style        0
MSRP                 0
dtype: int64

#### Grouping 

In [None]:
'''
 SELECT transmission_type , AVG(MSRP)
 FROM cars 
 GROUP BY transmission_type
 ''' 

In [38]:
df.groupby('Transmission Type').MSRP.min()

Transmission Type
AUTOMATIC    27150
MANUAL        2000
Name: MSRP, dtype: int64

### Getting the numpy arrays 

In [39]:
df.MSRP.values

array([ 2000, 27150, 54990, 34450, 32340])

In [41]:
# convert the df to a dict 
df.to_dict(orient='records')

[{'Engine Cylinders': 4,
  'Engine HP': 138.0,
  'MSRP': 2000,
  'Make': 'Nissan',
  'Model': 'Stanza',
  'Transmission Type': 'MANUAL',
  'Vehicle_Style': 'sedan',
  'Year': 1991},
 {'Engine Cylinders': 4,
  'Engine HP': nan,
  'MSRP': 27150,
  'Make': 'Hyundai',
  'Model': 'Sonata',
  'Transmission Type': 'AUTOMATIC',
  'Vehicle_Style': 'Sedan',
  'Year': 2017},
 {'Engine Cylinders': 4,
  'Engine HP': 218.0,
  'MSRP': 54990,
  'Make': 'Lotus',
  'Model': 'Elise',
  'Transmission Type': 'MANUAL',
  'Vehicle_Style': 'convertible',
  'Year': 2010},
 {'Engine Cylinders': 4,
  'Engine HP': 194.0,
  'MSRP': 34450,
  'Make': 'GMC',
  'Model': 'Acadia',
  'Transmission Type': 'AUTOMATIC',
  'Vehicle_Style': '4dr SUV',
  'Year': 2017},
 {'Engine Cylinders': 6,
  'Engine HP': 261.0,
  'MSRP': 32340,
  'Make': 'Nissan',
  'Model': 'Frontier',
  'Transmission Type': 'MANUAL',
  'Vehicle_Style': 'Pickup',
  'Year': 2017}]

# summary of session 1 