In [1]:
import numpy as np

## Numpy arrays

In [2]:
zeros = np.zeros(10)

In [3]:
zeros

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [4]:
ones = np.ones(10)
ones

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [5]:
array = np.full(10, 0.0)
array

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [6]:
array = np.repeat(0.0, 10)
array

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [7]:
array = np.repeat([0.0, 1.0], 5)
array

array([0., 0., 0., 0., 0., 1., 1., 1., 1., 1.])

In [8]:
array = np.repeat([0.0, 1.0], [2,3])
array

array([0., 0., 1., 1., 1.])

In [9]:
el = array[1]
print(el)

0.0


In [10]:
print(array[[4, 2, 0]])

[1. 1. 0.]


In [11]:
array[1] = 1
print(array)

[0. 1. 1. 1. 1.]


In [12]:
elements = [1, 2, 3, 4]
array = np.array(elements)
array

array([1, 2, 3, 4])

In [13]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [14]:
thresholds = np.linspace(0, 1, 11)
thresholds

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [15]:
zeros = np.zeros(10, dtype=np.uint8)
zeros

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)

In [16]:
zeros[0] = 300
print(zeros[0])

44


For the old behavior, usually:
    np.array(value).astype(dtype)`
will give the desired result (the cast overflows).
  zeros[0] = 300


In [17]:
for i in np.arange(5):
    print(i)

0
1
2
3
4


## Two-dimensional NumPy arrays

In [19]:
zeros = np.zeros((5, 2), dtype=np.float32)
zeros

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]], dtype=float32)

In [20]:
print(zeros.shape)

(5, 2)


In [21]:
numbers = [
[1, 2, 3],
[4, 5, 6],
[7, 8, 9]
]
numbers = np.array(numbers)
numbers

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [22]:
print(numbers[0, 1])

2


In [23]:
numbers[0, 1] = 10
numbers 

array([[ 1, 10,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [24]:
numbers[0]

array([ 1, 10,  3])

In [25]:
numbers[1] = [1, 1, 1]
numbers

array([[ 1, 10,  3],
       [ 1,  1,  1],
       [ 7,  8,  9]])

In [26]:
numbers[:, 2] = [9, 9, 9]
numbers

array([[ 1, 10,  9],
       [ 1,  1,  9],
       [ 7,  8,  9]])

## Randomly generated arrays

In [29]:
arr = np.random.rand(5, 2)
arr

array([[0.24569056, 0.57281933],
       [0.75940345, 0.65233947],
       [0.55466094, 0.22979016],
       [0.25383808, 0.11529009],
       [0.60232832, 0.30578723]])

In [32]:
np.random.seed(2)
arr = np.random.rand(5, 2)
arr

array([[0.4359949 , 0.02592623],
       [0.54966248, 0.43532239],
       [0.4203678 , 0.33033482],
       [0.20464863, 0.61927097],
       [0.29965467, 0.26682728]])

In [33]:
np.random.seed(2)
arr = np.random.randn(5, 2)
arr

array([[-0.41675785, -0.05626683],
       [-2.1361961 ,  1.64027081],
       [-1.79343559, -0.84174737],
       [ 0.50288142, -1.24528809],
       [-1.05795222, -0.90900761]])

In [35]:
np.random.seed(2)
randint = np.random.randint(low=0, high=100, size=(5, 2))
randint

array([[40, 15],
       [72, 22],
       [43, 82],
       [75,  7],
       [34, 49]])

In [36]:
idx = np.arange(5)
print('before shuffle', idx)
np.random.shuffle(idx)
print('after shuffle', idx)

before shuffle [0 1 2 3 4]
after shuffle [4 0 2 1 3]


## NumPy Operations

### Element-wise operations

In [37]:
rng = np.arange(5)
rng

array([0, 1, 2, 3, 4])

In [38]:
rng * 2

array([0, 2, 4, 6, 8])

In [39]:
(rng - 1) * 3 / 2 + 1

array([-0.5,  1. ,  2.5,  4. ,  5.5])

In [41]:
np.random.seed(2)
noise = 0.01 * np.random.rand(5)
numbers = np.arange(5)
result = numbers + noise
result.round(4)

array([0.0044, 1.0003, 2.0055, 3.0044, 4.0042])

In [43]:
np.random.seed(2)
pred = np.random.rand(3).round(2)
pred
square = pred ** 2
square

array([0.1936, 0.0009, 0.3025])

### Summarizing Operations

In [44]:
np.random.seed(2)
pred = np.random.rand(3).round(2)
pred_sum = pred.sum()
pred_sum

1.02

In [45]:
print('min = %.2f' % pred.min())
print('mean = %.2f' % pred.mean())
print('max = %.2f' % pred.max())
print('std = %.2f' % pred.std())

min = 0.03
mean = 0.34
max = 0.55
std = 0.22


In [46]:
np.random.seed(2)
matrix = np.random.rand(4, 3).round(2)
matrix

array([[0.44, 0.03, 0.55],
       [0.44, 0.42, 0.33],
       [0.2 , 0.62, 0.3 ],
       [0.27, 0.62, 0.53]])

In [47]:
matrix.max()

0.62

### Sorting

In [48]:
np.random.seed(2)
pred = np.random.rand(4).round(2)
pred

array([0.44, 0.03, 0.55, 0.44])

In [49]:
np.sort(pred)

array([0.03, 0.44, 0.44, 0.55])

In [50]:
pred.sort()

In [51]:
pred

array([0.03, 0.44, 0.44, 0.55])

# Pandas

In [52]:
import pandas as pd

In [57]:
data = [
['Nissan', 'Stanza', 1991, 138, 4, 'MANUAL', 'sedan', 2000],
['Hyundai', 'Sonata', 2017, None, 4, 'AUTOMATIC', 'Sedan', 27150],
['Lotus', 'Elise', 2010, 218, 4, 'MANUAL', 'convertable', 54990],
['GMC', 'Acadia', 2017, 194, 4, 'AUTOMATIC', '4dr SUV', 34450],
['Nissan', 'Frontier', 2017, 261, 6, 'MANUAL', 'Pickup', 32340],
]

In [58]:
columns = [
'Make', 'Model', 'Year', 'Engine HP', 'Engine Cylinders',
'Transmission Type', 'Vehicle_Style', 'MSRP'
]

In [59]:
df = pd.DataFrame(data, columns=columns)

In [60]:
df

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
2,Lotus,Elise,2010,218.0,4,MANUAL,convertable,54990
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


In [61]:
df.head(n=2)

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150


In [62]:
df.Make

0     Nissan
1    Hyundai
2      Lotus
3        GMC
4     Nissan
Name: Make, dtype: object

In [63]:
df['Make']

0     Nissan
1    Hyundai
2      Lotus
3        GMC
4     Nissan
Name: Make, dtype: object

In [64]:
df['Engine HP']

0    138.0
1      NaN
2    218.0
3    194.0
4    261.0
Name: Engine HP, dtype: float64

In [65]:
col_name = 'Engine HP'
df[col_name]

0    138.0
1      NaN
2    218.0
3    194.0
4    261.0
Name: Engine HP, dtype: float64

In [66]:
df[['Make', 'Model', 'MSRP']]

Unnamed: 0,Make,Model,MSRP
0,Nissan,Stanza,2000
1,Hyundai,Sonata,27150
2,Lotus,Elise,54990
3,GMC,Acadia,34450
4,Nissan,Frontier,32340


In [67]:
df['id'] = ['nis1', 'hyu1', 'lot2', 'gmc1', 'nis2']

In [68]:
df

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP,id
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000,nis1
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150,hyu1
2,Lotus,Elise,2010,218.0,4,MANUAL,convertable,54990,lot2
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450,gmc1
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340,nis2


In [69]:
df['id'] = [1, 2, 3, 4, 5]

In [70]:
df

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP,id
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000,1
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150,2
2,Lotus,Elise,2010,218.0,4,MANUAL,convertable,54990,3
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450,4
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340,5


In [71]:
del df['id']

In [72]:
df

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
2,Lotus,Elise,2010,218.0,4,MANUAL,convertable,54990
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


## Index

In [73]:
df.index

RangeIndex(start=0, stop=5, step=1)

## Accessin Rows

In [74]:
df.iloc[0]

Make                 Nissan
Model                Stanza
Year                   1991
Engine HP             138.0
Engine Cylinders          4
Transmission Type    MANUAL
Vehicle_Style         sedan
MSRP                   2000
Name: 0, dtype: object

In [75]:
df.iloc[[2, 3, 0]]

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
2,Lotus,Elise,2010,218.0,4,MANUAL,convertable,54990
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000


## Spliting a Data Frame

In [76]:
n_train = 3
n_val = 1
n_test = 1

In [77]:
df_train = df.iloc[:n_train]
df_val = df.iloc[n_train:n_train+n_val]
df_test = df.iloc[n_train+n_val:]

In [78]:
df_train

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
2,Lotus,Elise,2010,218.0,4,MANUAL,convertable,54990


In [79]:
df_val

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450


In [80]:
df_test

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340
