In [1]:
import pandas as pd
import numpy as np

In [2]:
apples = pd.DataFrame({'Region': ['Kanto', 'Johto', 'Hoenn', 'Sinnoh', 'Unova'],
                    'Temp. (F)': [73, 91, 87, 102, 69],
                   'Rainfall (mm)': [67, 88, 134, 43, 96],
                   'Humidity (%)': [43, 64, 58, 37, 70]},
                  )
apples = apples.set_index('Region')
apples

Unnamed: 0_level_0,Temp. (F),Rainfall (mm),Humidity (%)
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Kanto,73,67,43
Johto,91,88,64
Hoenn,87,134,58
Sinnoh,102,43,37
Unova,69,96,70


In [3]:
w1, w2, w3 = 0.3, 0.2, 0.5

In [4]:
kanto_temp = 73
kanto_rainfall = 67
kanto_humidity = 43

In [5]:
kanto_yield_apples = kanto_temp * w1 + kanto_rainfall * w2 + kanto_humidity * w3
kanto_yield_apples

56.8

In [6]:
kanto = [73, 67, 43]
johto = [91, 88, 64]
hoenn = [87, 134, 58]
sinnoh = [102, 43, 37]
unova = [69, 96, 70]

In [7]:
weights = [w1, w2, w3]

In [8]:
def crop_yield(region, weights):
    result = 0
    for x, w in zip(region, weights):
        result += x * w
    return result
crop_yield(kanto, weights)

56.8

In [9]:
kanto = np.array([73, 67, 43])
kanto

array([73, 67, 43])

In [10]:
weights = np.array([w1, w2, w3])
weights

array([0.3, 0.2, 0.5])

In [11]:
type(kanto)

numpy.ndarray

In [12]:
help(np.dot)

Help on function dot in module numpy:

dot(...)
    dot(a, b, out=None)
    
    Dot product of two arrays. Specifically,
    
    - If both `a` and `b` are 1-D arrays, it is inner product of vectors
      (without complex conjugation).
    
    - If both `a` and `b` are 2-D arrays, it is matrix multiplication,
      but using :func:`matmul` or ``a @ b`` is preferred.
    
    - If either `a` or `b` is 0-D (scalar), it is equivalent to
      :func:`multiply` and using ``numpy.multiply(a, b)`` or ``a * b`` is
      preferred.
    
    - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
      the last axis of `a` and `b`.
    
    - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
      sum product over the last axis of `a` and the second-to-last axis of
      `b`::
    
        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
    
    It uses an optimized BLAS library when possible (see `numpy.linalg`).
    
    Parameters
    ----------
    a 

In [13]:
np.dot(kanto, weights)

56.8

The dot product is a mathematical operation that takes two arrays (vectors or matrices) and returns a scalar value or a matrix, depending on the dimensions of the input arrays.

np.dot and the @ operator are useful for various mathematical and numerical computations involving arrays in Python.

In [14]:
(kanto*weights).sum()

56.8

In [15]:
arr1 = list(range(1000))
arr2 = list(range(1000, 2000))

In [16]:
arr1_np = np.array(arr1)
arr2_np = np.array(arr2)

In [17]:
%%time
result = 0
for x1, x2 in zip(arr1, arr2):
    result += x1 * x2 
result

Wall time: 0 ns


832333500

In [18]:
%%time
np.dot(arr1_np, arr2_np)

Wall time: 0 ns


832333500

### 2D array

In [19]:
climate_data = np.array([[73, 67, 43],
                       [91, 88, 64], 
                       [87, 134, 58], 
                       [102, 43, 37], 
                       [69, 96, 70]]
                       )
climate_data

array([[ 73,  67,  43],
       [ 91,  88,  64],
       [ 87, 134,  58],
       [102,  43,  37],
       [ 69,  96,  70]])

In [20]:
climate_data.shape

(5, 3)

### 3D array

In [21]:
arr3 = np.array([
    [[11, 12, 13],
     [13, 14, 15]],
    
    [[15, 16, 17],
     [17, 18, 19.5]]
])

In [22]:
arr3.shape

(2, 2, 3)

In [23]:
weights.dtype

dtype('float64')

### Matrix multiplication

In [24]:
np.matmul(climate_data, weights)

array([56.8, 76.9, 81.9, 57.7, 74.9])

In [25]:
climate_data @ weights

array([56.8, 76.9, 81.9, 57.7, 74.9])

In [26]:
climate_data = np.genfromtxt('climate.txt', delimiter = ',', skip_header = 1)

In [27]:
climate_data

array([[25., 76., 99.],
       [39., 65., 70.],
       [59., 45., 77.],
       ...,
       [99., 62., 58.],
       [70., 71., 91.],
       [92., 39., 76.]])

In [28]:
climate_data.shape

(10000, 3)

In [29]:
weights = np.array([0.3, 0.2, 0.5])

In [30]:
yields = climate_data @ weights
yields

array([72.2, 59.7, 65.2, ..., 71.1, 80.7, 73.4])

In [31]:
yields.shape

(10000,)

In [32]:
climate_results = np.concatenate((climate_data, yields.reshape(10000, 1)), axis = 1)
climate_results

array([[25. , 76. , 99. , 72.2],
       [39. , 65. , 70. , 59.7],
       [59. , 45. , 77. , 65.2],
       ...,
       [99. , 62. , 58. , 71.1],
       [70. , 71. , 91. , 80.7],
       [92. , 39. , 76. , 73.4]])

In [33]:
np.savetxt('climate_results.txt',
          climate_results,
          fmt = '%.2f',
          header = 'temperature, rainfall, humidity, yeils_apples',
          comments = " ")

In [34]:
arr2 = np.array([[1, 2, 3, 4],
               [5, 6, 7, 8],
               [9, 1, 2, 3]])
arr3 = np.array([[11, 12, 13, 14],
                [15, 16, 17, 18],
                 [19, 11, 12, 13]])

In [35]:
arr2+3

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12,  4,  5,  6]])

In [36]:
arr2 + arr3

array([[12, 14, 16, 18],
       [20, 22, 24, 26],
       [28, 12, 14, 16]])

In [37]:
arr4 = np.array([4, 5, 6, 7])

In [38]:
arr2 + arr4

array([[ 5,  7,  9, 11],
       [ 9, 11, 13, 15],
       [13,  6,  8, 10]])

In [39]:
arr1 = np.array([[1, 2, 3],
                 [3, 4, 5]])
arr2 = np.array([[2, 2, 3],
                 [1, 2, 5]])
arr1 == arr2

array([[False,  True,  True],
       [False, False,  True]])

In [40]:
(arr1 <= arr2).dtype

dtype('bool')

In [41]:
(arr1 <= arr2).sum()

4

In [42]:
arr3 = np.array([
    [[11, 12, 13, 14],
    [13, 14, 15,19]],
    
    [[15, 16, 17, 21],
    [63, 92, 36, 18]],
    
    [[98, 32, 81, 23],
    [17, 18, 19.5, 43]]
])

In [43]:
arr3[1, 1, 2]

36.0

In [44]:
np.zeros((3, 2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [45]:
np.ones((2, 4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [46]:
np.eye(2)

array([[1., 0.],
       [0., 1.]])

In [47]:
np.random.rand(5)

array([0.71128324, 0.44168402, 0.07202226, 0.52273535, 0.86749275])

In [48]:
np.random.randn(2, 3)

array([[ 0.74130493,  0.90328932, -0.95375567],
       [ 2.19571462, -0.62703221,  0.47523507]])

In [49]:
np.full([2, 3], 42)

array([[42, 42, 42],
       [42, 42, 42]])

In [50]:
np.arange(10, 90, 3)

array([10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49, 52, 55, 58,
       61, 64, 67, 70, 73, 76, 79, 82, 85, 88])

In [51]:
np.arange(10, 90, 3).reshape(3, 3, 3)

array([[[10, 13, 16],
        [19, 22, 25],
        [28, 31, 34]],

       [[37, 40, 43],
        [46, 49, 52],
        [55, 58, 61]],

       [[64, 67, 70],
        [73, 76, 79],
        [82, 85, 88]]])

In [52]:
np.linspace(3, 27, 5)

array([ 3.,  9., 15., 21., 27.])

In [53]:
np.__version__

'1.24.3'

In [54]:
a = np.array([[1,2], [3,4]])
print(a.T)
b = np.array([[3, 4], [5,6]])
c = a.dot(b)
print(c)
d = a * b # elementwise multiplication
print(d)

[[1 3]
 [2 4]]
[[13 16]
 [29 36]]
[[ 3  8]
 [15 24]]


In [56]:
# determinant
c = np.linalg.det(a)
c

-2.0000000000000004

In [57]:
# inverse
c = np.linalg.inv(a)
print(c)

[[-2.   1. ]
 [ 1.5 -0.5]]


In [59]:
d = a[np.newaxis, :]
print(d)
print(d.shape)


[[[1 2]
  [3 4]]]
(1, 2, 2)


In [60]:
e = a[:, np.newaxis]
print(e)
print(e.shape)

[[[1 2]]

 [[3 4]]]
(2, 1, 2)


In [70]:
# random numbers
a = np.random.random((3,2)) # uniform 0-1 distribution
print(a)
print("============================")
b = np.random.randn(3,2) # normal/Gaussian distribution, mean 0 and unit variance
# no tuple as shape here! each dimension one argument
print(b)
print("============================")

R = np.random.randn(10000)
print(f'Mean: {R.mean()}, Variance: {R.var()}, Standard Deviation: {R.std()}')
print('============================')
R = np.random.randn(10, 3)
print(f' Mean: {R.mean()}') # mean of whole array
print('============================')
# random integer, low,high,size; high is exclusive
R = np.random.randint(3,10,size=(3,3)) # if we only pass one parameter, then from 0-x
print(R)
print('============================')

# with integer is between 0 up to integer exclusive
c = np.random.choice(7, size=10)
print(c)
print('=============================')
# with an array it draws random values from this array
d = np.random.choice([1,2,3,4], size=8)
print(d)

[[0.47484028 0.3329906 ]
 [0.00512971 0.15697478]
 [0.3014366  0.75614663]]
[[-0.3971341  -2.3365596 ]
 [-0.84661527 -0.61573436]
 [ 0.82309369 -1.14491299]]
Mean: 0.005853978846422541, Variance: 0.9814940866396031, Standard Deviation: 0.9907038339683576
 Mean: -0.05267032205840178
[[4 4 4]
 [4 7 8]
 [3 9 7]]
[0 5 5 4 5 1 6 1 2 2]
[1 2 1 1 3 3 4 2]


In [73]:
# eigenvalues
a = np.array([[1,2], [3,4]])
eigenvalues, eigenvectors = np.linalg.eig(a)
# Note: use eigh if your matrix is symmetric (faster)
print(f'Eigen values: {eigenvalues}')
print(f' column vectors: {eigenvectors}') # column vectors
print(eigenvectors[:,0]) # column 0 corresponding to eigenvalue[0]

# verify: e-vec * e-val = A * e-vec
d = eigenvectors[:,0] * eigenvalues[0]
e = a @ eigenvectors[:, 0]
print(d, e)
print(d == e) # numerical issues

# correct way to compare matrix
print(np.allclose(d,e))

Eigen values: [-0.37228132  5.37228132]
 column vectors: [[-0.82456484 -0.41597356]
 [ 0.56576746 -0.90937671]]
[-0.82456484  0.56576746]
[ 0.30697009 -0.21062466] [ 0.30697009 -0.21062466]
[ True False]
True


In [74]:
# solve linear system
# x1+x2=2200
# 1.5 x1 + 4 x2 = 5050
# 2 equations and 2 unknowns
A = np.array([[1, 1], [1.5, 4]])
b = np.array([2200,5050])

# Ax = b <=> x = A-1 b

# But: inverse is slow and less accurate
x = np.linalg.inv(A).dot(b) # not recommended
print(x)
x = np.linalg.solve(A,b) # good
print(x)

[1500.  700.]
[1500.  700.]


#### loading from csv
#### https://www.python-engineer.com/videos/how-to-load-data/

#### 1) load with np.loadtxt()
#### skiprows=1
#### data = np.loadtxt(FILE_NAME, delimiter=",",dtype=np.float32)
#### print(data.shape, data.dtype)
 
####  2) load with np.genfromtxt()
#### skip_header=0, missing_values="---", filling_values=0.0
#### data = np.genfromtxt(FILE_NAME, delimiter=",", dtype=np.float32)
#### print(data.shape)