# NumPy

## 1D and 2D Arrays

In [2]:
import numpy as np

a = np.array([1, 2, 3])

print(a)

print(a.shape)

[1 2 3]
(3,)


In [3]:
import numpy as np

a = np.array([1, 1, 2, 3])

print(a.shape)

(4,)


In [4]:
"""

A Table:
[1,2,3]
[4,5,6]

"""

"""
list of lists
[[1,2,3],
 [4,5,6]]

"""

'\nlist of lists\n[[1,2,3],\n [4,5,6]]\n\n'

In [5]:
import numpy as np

b = np.array([[1, 2, 3], [4, 5, 6]])

print(b)

print(b.shape)

[[1 2 3]
 [4 5 6]]
(2, 3)


In [6]:
import numpy as np

c = np.array([[1.1, 2, 3], [4, 5.1, 6]])

print(c)

[[1.1 2.  3. ]
 [4.  5.1 6. ]]


In [7]:
import numpy as np

c = np.array([[1.1, 2, 3], [True, 5.1, "6"]])

print(c)

[['1.1' '2' '3']
 ['True' '5.1' '6']]


## NumPy Reshape

In [8]:
b.reshape((3, 2))

array([[1, 2],
       [3, 4],
       [5, 6]])

In [9]:
b.reshape((6,))

array([1, 2, 3, 4, 5, 6])

In [10]:
# This line of code gives us error
# b.reshape((5,))

## Functions for Generating NumPy Arrays

In [11]:
np.zeros((2, 3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [12]:
np.ones((2, 3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [13]:
np.ones((2, 3), dtype=int)

array([[1, 1, 1],
       [1, 1, 1]])

In [14]:
np.arange(1, 10)
# Like numpy range function it goes up until the stopping value -1

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [15]:
np.arange(1, 10, 2)
# 2 means it goes up by twos
# Start at 1, go up by two, until it hits 10 - 1

array([1, 3, 5, 7, 9])

In [16]:
np.linspace(1, 11, num=11)
# np.arange is just for int but linspace works well with floating point values
# what steps it needed to evenly space 11 numbers between 1 and eleven

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])

In [17]:
np.linspace(1, 12, num=11)
# np.arange is just for int but linspace works well with floating point values
# what steps it needed to evenly space 11 numbers between 1 and eleven

array([ 1. ,  2.1,  3.2,  4.3,  5.4,  6.5,  7.6,  8.7,  9.8, 10.9, 12. ])

## Indexing

In [18]:
b[0]

array([1, 2, 3])

In [19]:
b[1]

array([4, 5, 6])

In [20]:
# This line gives us error
# b[2]

In [21]:
# Just like python indexing The ':' operators means you want everything
b[:]

array([[1, 2, 3],
       [4, 5, 6]])

In [22]:
b[:, 0]

array([1, 4])

In [23]:
b[0, 0]

np.int64(1)

## Parallelization 

In [24]:
b * 2

array([[ 2,  4,  6],
       [ 8, 10, 12]])

In [25]:
for i in range(2):
    for j in range(3):
        b[i, j] = b[i, j] * 2

b

# ! We should not do this
# It rewrote b but the first one didn't do this
# First one made a copy cut the second actually rewrote b

array([[ 2,  4,  6],
       [ 8, 10, 12]])

In [26]:
b > 7

array([[False, False, False],
       [ True,  True,  True]])

In [27]:
b[b > 7]

array([ 8, 10, 12])

In [28]:
b

array([[ 2,  4,  6],
       [ 8, 10, 12]])

In [29]:
np.mean(b)

np.float64(7.0)

In [None]:
# avg of each row
np.mean(b, axis=1)

array([ 4., 10.])

In [None]:
# avg of each column
np.mean(b, axis=0)

array([5., 7., 9.])

In [None]:
# There is one axis for every single dimension
# This line gives us an error
# np.mean(b, axis=2)

In [33]:
np.mean(b, axis=-1)

array([ 4., 10.])

In [34]:
# Calculating Standard Deviation
np.std(b, axis=-1)

array([1.63299316, 1.63299316])

In [35]:
# Stack NumPy arrays and join them together
# By default axis is zero
np.concatenate([b, b * 2])

array([[ 2,  4,  6],
       [ 8, 10, 12],
       [ 4,  8, 12],
       [16, 20, 24]])

In [36]:
np.concatenate([b, b * 2], axis=1)

array([[ 2,  4,  6,  4,  8, 12],
       [ 8, 10, 12, 16, 20, 24]])

## Attention !!! How NumPy arrays arise in practice?

In [None]:
from tensorflow.keras.datasets import mnist

# Turn mnist to NumPy arrays
(X_train, y_train), (X_test, y_test) = mnist.load_data() # 4 Arrays

In [None]:
# An Array
X_train.shape

(60000, 28, 28)

In [12]:
import pandas as pd

df = pd.read_csv('housing.csv')
df

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY
...,...,...,...,...,...,...,...,...,...,...
20635,-121.09,39.48,25.0,1665.0,374.0,845.0,330.0,1.5603,78100.0,INLAND
20636,-121.21,39.49,18.0,697.0,150.0,356.0,114.0,2.5568,77100.0,INLAND
20637,-121.22,39.43,17.0,2254.0,485.0,1007.0,433.0,1.7000,92300.0,INLAND
20638,-121.32,39.43,18.0,1860.0,409.0,741.0,349.0,1.8672,84700.0,INLAND


In [13]:
df.to_numpy()

array([[-122.23, 37.88, 41.0, ..., 8.3252, 452600.0, 'NEAR BAY'],
       [-122.22, 37.86, 21.0, ..., 8.3014, 358500.0, 'NEAR BAY'],
       [-122.24, 37.85, 52.0, ..., 7.2574, 352100.0, 'NEAR BAY'],
       ...,
       [-121.22, 39.43, 17.0, ..., 1.7, 92300.0, 'INLAND'],
       [-121.32, 39.43, 18.0, ..., 1.8672, 84700.0, 'INLAND'],
       [-121.24, 39.37, 16.0, ..., 2.3886, 89400.0, 'INLAND']],
      dtype=object)

In [14]:
df.to_numpy().shape

(20640, 10)