# **Machine Learning 🤔**

In [6]:
%load_ext watermark
%watermark -v -m -p numpy,ray,pandas,sklearn

Python implementation: CPython
Python version       : 3.10.12
IPython version      : 8.14.0

numpy  : 1.26.0
ray    : 2.4.0
pandas : 1.5.3
sklearn: 1.2.0

Compiler    : GCC 11.4.0
OS          : Linux
Release     : 6.2.0-34-generic
Machine     : x86_64
Processor   : x86_64
CPU cores   : 12
Architecture: 64bit



In [328]:
import os
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import scipy

try:
    import modin.pandas as pd
    import ray 
    ray.init()
except Exception as e: 
    import pandas as pd

## **Numpy Exercise**

In [10]:
# Create an array with values as list
array = np.array([
    [1, 2, 3], [4, 5, 6], [7, 8, 9]
])
array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [16]:
# Create a null vector of shape as parameter
array = np.zeros((3, 3)) # (row, column)
array

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [20]:
# Shape and memory
print(array.shape)
print(array.size * array.itemsize)

(3, 3)
72


In [24]:
# Information of numpy 
np.info(array)

class:  ndarray
shape:  (3, 3)
strides:  (24, 8)
itemsize:  8
aligned:  True
contiguous:  True
fortran:  False
data pointer: 0x55cb02c52030
byteorder:  little
byteswap:  False
type: float64


In [32]:
# Modify the array
array = np.zeros((3, 3))
array[2][1] = 4
array[2, 0] = 3  # best respresentation
array[[1, 0], [2, 1]] = 1  # for x, y in zip(list1, list2)
try:
    array[3][0] = 1
except Exception as e:
    print(e)
array

index 3 is out of bounds for axis 0 with size 3


array([[0., 1., 0.],
       [0., 0., 1.],
       [3., 4., 0.]])

In [33]:
# Creating a numpy array with values ranging from a to b & reshape
array = np.arange(0, 9).reshape(3, 3)
array

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [42]:
 # reverse the order of row [0, n-1] -> [n-1, 0, -1]
print(array[::-1]) 

# reverse the order of columns [0, n-1] -> [n-1, 0, -1]
print(array[:, ::-1]) 

[[6 7 8]
 [3 4 5]
 [0 1 2]]
[[2 1 0]
 [5 4 3]
 [8 7 6]]


In [46]:
# Identity matrix with dtype
array = np.eye(3, dtype=np.int32)
array

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]], dtype=int32)

In [57]:
# 2d array with 1 on the borders
array = np.ones((5, 5))
array[1:-1, 1:-1] = 0
array


array([[1., 1., 1., 1., 1.],
       [1., 0., 0., 0., 1.],
       [1., 0., 0., 0., 1.],
       [1., 0., 0., 0., 1.],
       [1., 1., 1., 1., 1.]])

In [59]:
# Pad and array
array = np.ones((3, 3))
array = np.pad(array,  pad_width=1, mode='constant', constant_values=2)
array

array([[2., 2., 2., 2., 2.],
       [2., 1., 1., 1., 2.],
       [2., 1., 1., 1., 2.],
       [2., 1., 1., 1., 2.],
       [2., 2., 2., 2., 2.]])

In [62]:
# np.nan Expression
print(
    0 * np.nan,
    np.nan == np.nan,
    np.inf > np.nan,
    np.inf < np.nan,
    np.nan - np.nan,
    0.3 == 0.1 * 3,
    sep='\n'
)

nan
False
False
False
nan
False


In [69]:
# 5*5 matrix with values 1,2,3,4 just below diagnol
array = np.diag((1, 2, 3, 4), k=-1) # k is the shift 
array 

array([[0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 2, 0, 0, 0],
       [0, 0, 3, 0, 0],
       [0, 0, 0, 4, 0]])

In [76]:
# checkboard pattern
array = np.zeros((8, 8))
array[0:-1:2,0:-1:2] = 1
array[1::2, 1::2] = 1
array

array([[1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.]])

In [83]:
# unravel the numpy array 
print(np.unravel_index(10, array.shape))

# tile function
print(np.tile(
    np.array([[0, 1], [1, 0]]), reps=(4, 4)
))

(1, 2)
[[0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]]


In [86]:
# set random seed => value doesn't change
np.random.seed(42)

# Generate Random numpy array
array = np.random.random((3, 3))
print(array)

[[0.37454012 0.95071431 0.73199394]
 [0.59865848 0.15601864 0.15599452]
 [0.05808361 0.86617615 0.60111501]]


In [88]:
# Calculate the metrics
# -1, -2, ... from the x-axis(row wise), y-axiz(column wise), ...
print(array.min(), array.max(), array.mean(), array.std())
print(array.min(-1), array.max(-1), array.mean(-1), array.std(-1), sep='\n')

0.05808361216819946 0.9507143064099162 0.4992549757478559 0.30888608340705315
[0.37454012 0.15599452 0.05808361]
[0.95071431 0.59865848 0.86617615]
[0.68574946 0.30355721 0.50845826]
[0.23748416 0.20866811 0.3363454 ]


In [85]:
# Normalize = (x - x_mean)/(x_std)
array = np.random.random((4, 3))
array = (array - np.mean(array))/(np.std(array)) 
array

array([[-0.93249038, -0.26556256,  0.05869067],
       [ 0.4506235 ,  1.88848002, -0.66956567],
       [ 0.70474267,  1.04630988, -1.33899508],
       [ 1.11241375, -0.79691986, -1.25772695]])

In [89]:
# Custom Dtype
color = np.dtype([
    ("r", np.ubyte, 1),
    ("g", np.ubyte, 1),
    ("b", np.ubyte, 1),
    ("a", np.ubyte, 1),
])

color



dtype([('r', 'u1'), ('g', 'u1'), ('b', 'u1'), ('a', 'u1')])

In [93]:
colors = np.array(
    [
        (255, 0, 0, 255),
        (0, 255, 0, 255),
    ],
    dtype=color
)

# Access the fields by name
print(colors['r'])
print(colors['g']) 
print(colors['b']) 
print(colors['a'])

[255   0]
[  0 255]
[0 0]
[255 255]


In [91]:
# Matrix Product
array1, array2 = np.ones((3,2)), np.ones((2, 1))
print(np.dot(array1, array2))
print(array1 @ array2)

[[2.]
 [2.]
 [2.]]
[[2.]
 [2.]
 [2.]]


In [101]:
# conditions
array = np.arange(11)
indices = (3 < array) & (8 > array)
array[indices] *= -1
indices, array

(array([False, False, False, False,  True,  True,  True,  True, False,
        False, False]),
 array([ 0,  1,  2,  3, -4, -5, -6, -7,  8,  9, 10]))

In [102]:
# calculate sum() from start=-1 and np.sum() calclates along axis -1
sum(range(5),-1), np.sum(range(5),-1)

(9, 10)

In [132]:
# astype() + conditions
array = np.arange(-2, 2).reshape((2, 2)).astype(np.float32)
print(
    array,
    array**array,  # 0**0 = 1
    2 << array.astype(np.int32) >> 2,
    array < -array,
    1j*array,
    array/1/1,
    sep="\n"
)

[[-2. -1.]
 [ 0.  1.]]
[[ 0.25 -1.  ]
 [ 1.    1.  ]]
[[0 0]
 [0 1]]
[[ True  True]
 [False False]]
[[-0.-2.j -0.-1.j]
 [ 0.+0.j  0.+1.j]]
[[-2. -1.]
 [ 0.  1.]]


In [130]:
# conditions

print(
    np.array(0) / np.array(0),
    np.array(0) // np.array(0),
    np.array([np.nan]).astype(int).astype(float),
    sep="\n"
)

nan
0
[-9.22337204e+18]




In [150]:
# round away from zero a float array
array = np.random.uniform(-10, 10, 10)
array2 = np.copysign(np.ceil(np.abs(array)), array)
array, array2

(array([-9.58831011,  9.39819704,  6.64885282, -5.75321779, -6.36350066,
        -6.3319098 , -3.91515514,  0.49512863, -1.36109963, -4.1754172 ]),
 array([-10.,  10.,   7.,  -6.,  -7.,  -7.,  -4.,   1.,  -2.,  -5.]))

In [153]:
# intersection
array1 = np.random.randint(0, 10, 10)
array2 = np.random.randint(0, 10, 10)

print(
    np.intersect1d(array1, array2),
    np.union1d(array1, array2)
)

[6 7 8] [0 1 3 4 6 7 8 9]


In [162]:
# ignore errors + emath(scimath)
"""
defaults = np.seterr(all="ignore")
<-- code --> 
_ = np.seterr(**defaults)
"""

with np.errstate(all="ignore"):
    print(np.sqrt(-1), np.emath.sqrt(-1))
    array = np.ones(1) / 0
    print(array)

nan 1j
[inf]


In [171]:
# Datetime
print(
    yesterday := np.datetime64('today', 'D') - np.timedelta64(1, 'D'),
    today := np.datetime64('today', 'D'),
    tomorrow := np.datetime64('today', 'D') + np.timedelta64(1, 'D'),
    sep='\n'
)

array = np.arange('2023-03-17', '2023-08-29', dtype='datetime64[M]')
print(array)


2023-10-10
2023-10-11
2023-10-12
['2023-03' '2023-04' '2023-05' '2023-06' '2023-07']


In [173]:
# ((A+B)*(-A/2))
A = np.ones(3)*1
B = np.ones(3)*2
(A+B)*(-A/2)

# Other method 
np.add(A,B, out=B)
np.divide(A, 2, out=A)
np.negative(A, out=A)
np.multiply(A, B, out=A)
A

array([-1.5, -1.5, -1.5])

In [185]:
# Extract signs and integer parts
array = np.random.uniform(-10, 10, 10)
print(
    array,
    array - array%1,
    np.floor(array),
    np.ceil(array),
    np.trunc(array),
    sep="\n",
)

[ 0.61869167 -1.04433671  1.05786178  1.85393448 -8.38293347 -2.60691088
 -5.15680123  6.06279513 -0.59398731  9.66846282]
[ 0. -2.  1.  1. -9. -3. -6.  6. -1.  9.]
[ 0. -2.  1.  1. -9. -3. -6.  6. -1.  9.]
[ 1. -1.  2.  2. -8. -2. -5.  7. -0. 10.]
[ 0. -1.  1.  1. -8. -2. -5.  6. -0.  9.]


In [189]:
# array with row values rangin from 0-4
array = np.zeros((5, 5))
array[:] = np.arange(5) # array += np.arange(5)
array

array([[0., 1., 2., 3., 4.],
       [0., 1., 2., 3., 4.],
       [0., 1., 2., 3., 4.],
       [0., 1., 2., 3., 4.],
       [0., 1., 2., 3., 4.]])

In [194]:
# build and array from generator
def generate():
    for x in range(10):
        yield x

array = np.fromiter(
    generate(), dtype=np.float32  # range() function are supported
)
array

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32)

In [224]:
# array sotring
array = np.random.randint(0, 10, 5)
array.sort()
array

array([0, 5, 6, 8, 8])

In [226]:
# make immutable
array = np.zeros(10)
array.flags.writeable=False
try:
    array[0]=2
except Exception as e:
    print(e)
finally:
    print(array)

assignment destination is read-only
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [254]:
array = np.arange(0, 100000, 10000)

%time np.prod(array[1:])
%time np.multiply.reduce(array[1:])

CPU times: user 41 µs, sys: 0 ns, total: 41 µs
Wall time: 46.5 µs
CPU times: user 22 µs, sys: 0 ns, total: 22 µs
Wall time: 26.7 µs


4125042171973730304

In [258]:
# comparison
A = np.array([1.0, 2.0, 3.0])
B = np.array([1.0, 2.0000001, 3.0])
print(
    A, B,
    A==B,
    np.allclose(A, B),
    np.array_equal(A, B),
    sep='\n'
)

[1. 2. 3.]
[1.        2.0000001 3.       ]
[ True False  True]
True
False


In [278]:
# stacking
array1 = np.random.randint(0, 10, 9).reshape((3, 3))
array2 = np.random.randint(0, 10, 9).reshape((3, 3))

array1, array2, np.column_stack((array1, array2)), np.vstack((array1, array2))

(array([[9, 2, 4],
        [5, 8, 4],
        [0, 3, 4]]),
 array([[9, 9, 4],
        [6, 3, 0],
        [4, 6, 9]]),
 array([[9, 2, 4, 9, 9, 4],
        [5, 8, 4, 6, 3, 0],
        [0, 3, 4, 4, 6, 9]]),
 array([[9, 2, 4],
        [5, 8, 4],
        [0, 3, 4],
        [9, 9, 4],
        [6, 3, 0],
        [4, 6, 9]]))

In [268]:
# random 10*2 matrix representing cartesian cordinates -> convert them to polar coordinates
cartesian_cordinates = np.random.random((5, 2))
x, y = cartesian_cordinates[:, 0], cartesian_cordinates[:, 1]
polar_cordinates = np.column_stack(( # or np.vstack()
    np.sqrt(x**2 + y**2),  # r
    np.arctan2(y, x) # theta
))
cartesian_cordinates, polar_cordinates

(array([[0.47030063, 0.98342314],
        [0.39882444, 0.81643187],
        [0.79834512, 0.15071754],
        [0.50819878, 0.69581281],
        [0.8583588 , 0.32595891]]),
 array([[1.09009346, 1.1247174 ],
        [0.90863741, 1.11639347],
        [0.81244736, 0.18659139],
        [0.86163882, 0.93997914],
        [0.91816613, 0.36292563]]))

In [260]:
# maxvalues of numpy array
array = np.random.randint(0, 10, 8)
array, np.argmax(array)


(array([9, 8, 8, 0, 8, 6, 8, 7]), 0)

In [284]:
# create a structured array with x and y coordinates covering the [0, 1]x[0, 1] area
array = np.zeros((5, 5), dtype=[('x', np.float32), ('y', np.float32)])
array['x'], array['y'] = np.meshgrid(
    np.linspace(0, 1, 5),
    np.linspace(0, 1, 5)
)
print(array)

[[(0.  , 0.  ) (0.25, 0.  ) (0.5 , 0.  ) (0.75, 0.  ) (1.  , 0.  )]
 [(0.  , 0.25) (0.25, 0.25) (0.5 , 0.25) (0.75, 0.25) (1.  , 0.25)]
 [(0.  , 0.5 ) (0.25, 0.5 ) (0.5 , 0.5 ) (0.75, 0.5 ) (1.  , 0.5 )]
 [(0.  , 0.75) (0.25, 0.75) (0.5 , 0.75) (0.75, 0.75) (1.  , 0.75)]
 [(0.  , 1.  ) (0.25, 1.  ) (0.5 , 1.  ) (0.75, 1.  ) (1.  , 1.  )]]


In [291]:
# Cauch matrix C: Cij = 1/xi-yj
x = np.arange(8)
y = x + 0.5
C = 1./np.subtract.outer(x, y)
C, np.linalg.det(C)

(array([[-2.        , -0.66666667, -0.4       , -0.28571429, -0.22222222,
         -0.18181818, -0.15384615, -0.13333333],
        [ 2.        , -2.        , -0.66666667, -0.4       , -0.28571429,
         -0.22222222, -0.18181818, -0.15384615],
        [ 0.66666667,  2.        , -2.        , -0.66666667, -0.4       ,
         -0.28571429, -0.22222222, -0.18181818],
        [ 0.4       ,  0.66666667,  2.        , -2.        , -0.66666667,
         -0.4       , -0.28571429, -0.22222222],
        [ 0.28571429,  0.4       ,  0.66666667,  2.        , -2.        ,
         -0.66666667, -0.4       , -0.28571429],
        [ 0.22222222,  0.28571429,  0.4       ,  0.66666667,  2.        ,
         -2.        , -0.66666667, -0.4       ],
        [ 0.18181818,  0.22222222,  0.28571429,  0.4       ,  0.66666667,
          2.        , -2.        , -0.66666667],
        [ 0.15384615,  0.18181818,  0.22222222,  0.28571429,  0.4       ,
          0.66666667,  2.        , -2.        ]]),
 3638.16363711

In [298]:
# closest value to given vector 
array = np.arange(100)
v = np.random.uniform(0, 100)
index = np.abs(array-v).argmin()
array[index]

7

In [320]:
# Datatype position (x, y) and color(r, g, b)
color = np.dtype([
    ("r", np.ubyte, 1),
    ("g", np.ubyte, 1),
    ("b", np.ubyte, 1),
])

position = np.dtype([
    ("x", np.ubyte, 1),
    ("y", np.ubyte, 1)
])

datatype = np.dtype([
    ("position", position, 1),
    ("color", color, 1)
])

array = np.array([
    ((1, 2), (2, 3, 4)),
    ((2, 3), (4, 5, 6)),
], dtype=datatype)

display(datatype, array, array['position']['x'], array['color'][0])



dtype([('position', [('x', 'u1'), ('y', 'u1')]), ('color', [('r', 'u1'), ('g', 'u1'), ('b', 'u1')])])

array([((1, 2), (2, 3, 4)), ((2, 3), (4, 5, 6))],
      dtype=[('position', [('x', 'u1'), ('y', 'u1')]), ('color', [('r', 'u1'), ('g', 'u1'), ('b', 'u1')])])

array([1, 2], dtype=uint8)

(2, 3, 4)

In [322]:
array = np.zeros(4, [ ('position', [ ('x', float, 1),
                                  ('y', float, 1)]),
                   ('color',    [ ('r', float, 1),
                                  ('g', float, 1),
                                  ('b', float, 1)])])
print(array)

[((0., 0.), (0., 0., 0.)) ((0., 0.), (0., 0., 0.))
 ((0., 0.), (0., 0., 0.)) ((0., 0.), (0., 0., 0.))]




In [329]:
# Distance between every two points of a given coordinate points
array = np.random.random((5, 2))
x, y = np.atleast_2d(array[:, 0], array[:, 1])
print(np.sqrt((x-x.T)**2 + (y-y.T)**2))

print(scipy.spatial.distance.cdist(array, array))

[[0.         0.68060587 0.72902682 0.93086318 0.94899237]
 [0.68060587 0.         0.7244187  0.72159601 0.28714832]
 [0.72902682 0.7244187  0.         0.25997017 0.96779519]
 [0.93086318 0.72159601 0.25997017 0.         0.90582586]
 [0.94899237 0.28714832 0.96779519 0.90582586 0.        ]]
[[0.         0.68060587 0.72902682 0.93086318 0.94899237]
 [0.68060587 0.         0.7244187  0.72159601 0.28714832]
 [0.72902682 0.7244187  0.         0.25997017 0.96779519]
 [0.93086318 0.72159601 0.25997017 0.         0.90582586]
 [0.94899237 0.28714832 0.96779519 0.90582586 0.        ]]


In [None]:
# read from file
