# Data Wrangling with NumPy

In [40]:
import numpy as np
import numpy.linalg
from numpy.ma.core import swapaxes

In [47]:
arr = np.arange(100000)
lst = list(range(100000))

##### _NumPy Takes less time than python loops._

In [42]:
import timeit

In [48]:
%timeit arr2 = arr * 2

52.7 μs ± 3.21 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [49]:
%timeit lst2 = [x * 2 for x in lst]

6.12 ms ± 120 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


#### Multidimensional Array Object

In [50]:
import numpy as np

In [52]:
data = np.array([[1.5, -0.1, 3], [0,-3,6.5]])

In [54]:
print(data)

[[ 1.5 -0.1  3. ]
 [ 0.  -3.   6.5]]


In [57]:
data = data *2
print(data)

[[  6.   -0.4  12. ]
 [  0.  -12.   26. ]]


In [58]:
print(data + data)

[[ 12.   -0.8  24. ]
 [  0.  -24.   52. ]]


In [59]:
data.shape

(2, 3)

In [61]:
data.dtype

dtype('float64')

#### Creating ndarray

###### _This accepts any sequence-like object (including other arrays) and produces a new NumPy array containing the passed data._


In [62]:
data1 = [6,5,7,8,9,12]
arr = np.array(data)

In [64]:
print(arr)

[[  6.   -0.4  12. ]
 [  0.  -12.   26. ]]


In [65]:
data2 = [[2,3],[4,5]]
arr2 = np.array(data2)

In [66]:
print(arr2)

[[2 3]
 [4 5]]


In [67]:
arr2.ndim

2

In [68]:
arr2.shape

(2, 2)

In [69]:
arr2.dtype

dtype('int64')

In [70]:
zero = np.zeros(10)
zero

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [74]:
zero = np.zeros((2,4))
print(zero)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [79]:
faka = np.empty((2,3,4))

In [80]:
print(faka)

[[[6.23042070e-307 1.86918699e-306 1.69121096e-306 1.86921822e-306]
  [1.42417900e-306 1.78019082e-306 1.37961913e-306 6.23057349e-307]
  [1.06811422e-306 1.42417221e-306 1.37961641e-306 1.60220393e-306]]

 [[8.34424342e-308 1.22382882e-307 1.11261027e-306 1.11261502e-306]
  [1.42410839e-306 7.56597770e-307 6.23059726e-307 8.90104239e-307]
  [1.11259940e-306 1.51320640e-306 2.22522596e-306 2.22522596e-306]]]


In [82]:
np.arange(20)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [90]:
osomitot = np.zeros((3,2,4,3))

In [91]:
osomitot

array([[[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]],


       [[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]],


       [[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]]])

#### Type Casting

In [92]:
arr = np.array([1, 2, 3, 4, 5])

In [93]:
arr.dtype

dtype('int64')

In [94]:
spell = arr.astype(np.float64)

In [95]:
spell

array([1., 2., 3., 4., 5.])

In [96]:
spell.dtype

dtype('float64')

In [97]:
spell = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])

In [98]:
reverse_spell = spell.astype(np.int64)

In [99]:
reverse_spell

array([ 3, -1, -2,  0, 12, 10])

In [102]:
suta = np.array(["1.25", "-9.6", "42"], dtype=np.bytes_)

In [104]:
spell = suta.astype(np.float64)

In [105]:
spell

array([ 1.25, -9.6 , 42.  ])

In [106]:
suta = np.array([1.25, -9.6, 42], dtype="i4")

In [107]:
suta

array([ 1, -9, 42], dtype=int32)

In [108]:
abar_suta = suta.astype(spell.dtype)

In [109]:
abar_suta

array([ 1., -9., 42.])

#### Arithmetic Operation

In [110]:
dungeon = np.array ([[2,3,4],[1,6,7]])

In [112]:
print(dungeon)

[[2 3 4]
 [1 6 7]]


In [116]:
print(dungeon * dungeon)

[[ 4  9 16]
 [ 1 36 49]]


In [117]:
print( dungeon + dungeon)

[[ 4  6  8]
 [ 2 12 14]]


In [118]:
print( 2/dungeon )

[[1.         0.66666667 0.5       ]
 [2.         0.33333333 0.28571429]]


In [120]:
print(dungeon ** 2)

[[ 4  9 16]
 [ 1 36 49]]


In [121]:
dragon = np.array([[3,5,6],[12,15,15]])

In [124]:
print(dungeon)

[[2 3 4]
 [1 6 7]]


In [125]:
print( dragon )

[[ 3  5  6]
 [12 15 15]]


In [123]:
print(dragon > dungeon)

[[ True  True  True]
 [ True  True  True]]


#### Indexing & Slicing

In [130]:
hati = np.array([17,2,2004,5,2,2026])

In [131]:
hati

array([  17,    2, 2004,    5,    2, 2026])

In [132]:
hati[2]

np.int64(2004)

In [133]:
hati[2:5]

array([2004,    5,    2])

In [134]:
hati [2:4] = 112

In [135]:
hati

array([  17,    2,  112,  112,    2, 2026])

In [136]:
dukhi_hati = hati[2:5]

In [137]:
dukhi_hati

array([112, 112,   2])

In [139]:
dukhi_hati[2] = 15

In [140]:
dukhi_hati

array([112, 112,  15])

In [141]:
hati

array([  17,    2,  112,  112,   15, 2026])

In [142]:
hati[:] = 17

In [143]:
hati

array([17, 17, 17, 17, 17, 17])

In [144]:
dukhi_hati

array([17, 17, 17])

In [145]:
sukhi_hati = hati[2:5].copy()

In [146]:
sukhi_hati

array([17, 17, 17])

In [147]:
sukhi_hati[2] = 00

In [149]:
sukhi_hati

array([17, 17,  0])

In [150]:
hati

array([17, 17, 17, 17, 17, 17])

In [151]:
boro_hati = np.array([[1,2,3],[4,5,6],[7,8,9]])
boro_hati[0]

array([1, 2, 3])

In [153]:
boro_hati[1]

array([4, 5, 6])

In [154]:
boro_hati[2]

array([7, 8, 9])

In [155]:
boro_hati[0,2]

np.int64(3)

In [156]:
chinta_hati = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

In [157]:
chinta_hati

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [161]:
print(chinta_hati[0])

[[1 2 3]
 [4 5 6]]


In [165]:
print(chinta_hati[1])

[[ 7  8  9]
 [10 11 12]]


In [166]:
ager_hati = chinta_hati[0].copy()

In [167]:
chinta_hati[0] = 1

In [170]:
print(chinta_hati[0])

[[1 1 1]
 [1 1 1]]


In [171]:
chinta_hati[0] = ager_hati
print(chinta_hati[0])

[[1 2 3]
 [4 5 6]]


In [172]:
chinta_hati

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [173]:
chinta_hati[0,1]

array([4, 5, 6])

In [174]:
chinta_hati[1,0]

array([7, 8, 9])

In [198]:
chinta_hati[0:1,0:1]

array([[[1, 2, 3]]])

In [199]:
chinta_hati

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [200]:
chinta_hati = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [202]:
print(chinta_hati)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [229]:
print(chinta_hati[ : 2])

[[1 2 3]
 [4 5 6]]


In [235]:
print(chinta_hati[ :2,0:3 ])

[[1 2 3]
 [4 5 6]]


In [222]:
choto_hati = chinta_hati[1, :2]

In [223]:
choto_hati

array([4, 5])

In [224]:
choto_hati.shape

(2,)

In [225]:
chinta_hati

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [228]:
print(chinta_hati[:2,2])

[3 6]


In [242]:
print(chinta_hati[:,1:3])

[[2 3]
 [5 6]
 [8 9]]


_Boolean Indexing_

In [243]:
hati = np.array(["montu","jontu","boltu","pintu","boltu","ontu","santu"])

In [244]:
hati

array(['montu', 'jontu', 'boltu', 'pintu', 'boltu', 'ontu', 'santu'],
      dtype='<U5')

In [257]:
boyos = np.array([[4, 7], [0, 2], [-5, 6], [0, 0], [1, 2],[-12, -4], [3, 4]])

In [258]:
boyos

array([[  4,   7],
       [  0,   2],
       [ -5,   6],
       [  0,   0],
       [  1,   2],
       [-12,  -4],
       [  3,   4]])

In [259]:
hati == "boltu"

array([False, False,  True, False,  True, False, False])

In [260]:
boyos[hati == "boltu"]

array([[-5,  6],
       [ 1,  2]])

In [261]:
print(boyos[hati == "boltu"])

[[-5  6]
 [ 1  2]]


In [262]:
boyos[hati == "boltu"]

array([[-5,  6],
       [ 1,  2]])

In [265]:
print(boyos[hati == "boltu",1:])

[[6]
 [2]]


In [266]:
print(boyos[hati == "boltu",1])

[6 2]


In [267]:
boyos[boyos<5] = 0

In [268]:
boyos

array([[0, 7],
       [0, 0],
       [0, 6],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0]])

In [269]:
print(boyos)

[[0 7]
 [0 0]
 [0 6]
 [0 0]
 [0 0]
 [0 0]
 [0 0]]


#### Transpose & Swapping

In [271]:
biral = np.arange(12).reshape(3,4)

In [272]:
biral

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [273]:
print(biral)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [274]:
biral.T

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [275]:
print(biral.T)

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [276]:
print(np.dot(biral,biral.T))

[[ 14  38  62]
 [ 38 126 214]
 [ 62 214 366]]


In [277]:
print(np.dot(biral.T,biral))

[[ 80  92 104 116]
 [ 92 107 122 137]
 [104 122 140 158]
 [116 137 158 179]]


In [278]:
biral.swapaxes(1,0)

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [279]:
print(biral)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [286]:
print(biral.swapaxes(0,1))

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


#### Pseudorandom Number Generation

In [290]:
nomuna = np.random.standard_normal((4,4))

In [293]:
nomuna

array([[ 0.68665779,  0.63560727, -0.20572364, -0.87558403],
       [ 0.81760289,  0.69760869, -0.14921211, -0.49808364],
       [-1.22489863,  0.01248803, -0.32447767,  0.28629568],
       [-1.19826351, -0.42092796, -0.18490619,  0.27899796]])

#### Universal Functions

In [295]:
borgo = np.arange(15)

In [296]:
borgo

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [297]:
np.sqrt(borgo)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ,
       3.16227766, 3.31662479, 3.46410162, 3.60555128, 3.74165739])

In [298]:
np.exp(borgo)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03, 2.20264658e+04, 5.98741417e+04,
       1.62754791e+05, 4.42413392e+05, 1.20260428e+06])

In [299]:
a = np.random.standard_normal(8)

In [304]:
b = np.random.standard_normal(8)

In [305]:
a

array([-1.28415004,  0.19303299,  1.41216838, -0.06823789,  2.48449295,
        0.12696753,  0.11857918,  1.38888088])

In [306]:
b

array([-1.23212223, -2.4727967 ,  0.66249541, -0.31793732, -1.17184422,
        1.47814322, -0.1628037 , -1.30730432])

In [307]:
np.maximum(a,b)

array([-1.23212223,  0.19303299,  1.41216838, -0.06823789,  2.48449295,
        1.47814322,  0.11857918,  1.38888088])

In [308]:
a

array([-1.28415004,  0.19303299,  1.41216838, -0.06823789,  2.48449295,
        0.12696753,  0.11857918,  1.38888088])

In [309]:
rem,whole = np.modf(a)

In [310]:
rem

array([-0.28415004,  0.19303299,  0.41216838, -0.06823789,  0.48449295,
        0.12696753,  0.11857918,  0.38888088])

In [311]:
whole

array([-1.,  0.,  1., -0.,  2.,  0.,  0.,  1.])

In [312]:
np.sign(a)

array([-1.,  1.,  1., -1.,  1.,  1.,  1.,  1.])

In [313]:
np.ceil(a)

array([-1.,  1.,  2., -0.,  3.,  1.,  1.,  2.])

In [314]:
a

array([-1.28415004,  0.19303299,  1.41216838, -0.06823789,  2.48449295,
        0.12696753,  0.11857918,  1.38888088])

In [315]:
np.floor(a)

array([-2.,  0.,  1., -1.,  2.,  0.,  0.,  1.])

In [318]:
a = np.arange(1,10,2)

In [319]:
a

array([1, 3, 5, 7, 9])

In [322]:
b = np.arange(0,10,2)

In [323]:
b

array([0, 2, 4, 6, 8])

In [324]:
points = np.arange(-5,5,0.01)

In [325]:
points

array([-5.0000000e+00, -4.9900000e+00, -4.9800000e+00, -4.9700000e+00,
       -4.9600000e+00, -4.9500000e+00, -4.9400000e+00, -4.9300000e+00,
       -4.9200000e+00, -4.9100000e+00, -4.9000000e+00, -4.8900000e+00,
       -4.8800000e+00, -4.8700000e+00, -4.8600000e+00, -4.8500000e+00,
       -4.8400000e+00, -4.8300000e+00, -4.8200000e+00, -4.8100000e+00,
       -4.8000000e+00, -4.7900000e+00, -4.7800000e+00, -4.7700000e+00,
       -4.7600000e+00, -4.7500000e+00, -4.7400000e+00, -4.7300000e+00,
       -4.7200000e+00, -4.7100000e+00, -4.7000000e+00, -4.6900000e+00,
       -4.6800000e+00, -4.6700000e+00, -4.6600000e+00, -4.6500000e+00,
       -4.6400000e+00, -4.6300000e+00, -4.6200000e+00, -4.6100000e+00,
       -4.6000000e+00, -4.5900000e+00, -4.5800000e+00, -4.5700000e+00,
       -4.5600000e+00, -4.5500000e+00, -4.5400000e+00, -4.5300000e+00,
       -4.5200000e+00, -4.5100000e+00, -4.5000000e+00, -4.4900000e+00,
       -4.4800000e+00, -4.4700000e+00, -4.4600000e+00, -4.4500000e+00,
      

In [326]:
xs, ys = np.meshgrid(points, points)

In [327]:
ys

array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],
       [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
       [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
       ...,
       [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],
       [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],
       [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]],
      shape=(1000, 1000))

In [328]:
z = np.sqrt(xs **2 + ys**2)

In [329]:
z

array([[7.07106781, 7.06400028, 7.05693985, ..., 7.04988652, 7.05693985,
        7.06400028],
       [7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
        7.05692568],
       [7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
        7.04985815],
       ...,
       [7.04988652, 7.04279774, 7.03571603, ..., 7.0286414 , 7.03571603,
        7.04279774],
       [7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
        7.04985815],
       [7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
        7.05692568]], shape=(1000, 1000))

#### File IO

#### Linear Algebra

In [4]:
import numpy as np

In [21]:
#Matrix Multiplication
x = np.array([[1., 5., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])

In [7]:
x

array([[1., 2., 3.],
       [4., 5., 6.]])

In [8]:
y

array([[ 6., 23.],
       [-1.,  7.],
       [ 8.,  9.]])

In [9]:
x.dot(y)

array([[ 28.,  64.],
       [ 67., 181.]])

In [10]:
np.dot(x,y)

array([[ 28.,  64.],
       [ 67., 181.]])

In [17]:
#numpy.linalg
from numpy.linalg import inv, qr

In [22]:
mat = np.dot(x.T,x)

In [23]:
print(mat)

[[17. 25. 27.]
 [25. 50. 45.]
 [27. 45. 45.]]


In [25]:
print(np.linalg.inv(mat))

[[-2.81474977e+14 -1.12589991e+14  2.81474977e+14]
 [-1.12589991e+14 -4.50359963e+13  1.12589991e+14]
 [ 2.81474977e+14  1.12589991e+14 -2.81474977e+14]]


In [27]:
print(np.dot(mat,np.linalg.inv(mat)))

[[ 1.       -0.015625  0.90625 ]
 [ 0.        1.140625 -0.15625 ]
 [ 0.        0.140625  1.84375 ]]


In [30]:
np.linalg.diagonal(mat)

array([17., 50., 45.])

#### Random Walks

In [31]:
import random
position = 0
walk = [position]
nsteps = 1000
for _ in range(nsteps):
    step = 1 if random.randint(0, 1) else -1
    position += step
    walk.append(position)

In [32]:
npsteps = 1000

In [33]:
rng = np.random.default_rng(seed=12345)

In [34]:
draws = rng.integers(0,2,size=npsteps)

In [35]:
steps = np.where(draws==0,1,-1)

In [36]:
walk = steps.cumsum()

In [37]:
walk.min()

np.int64(-8)

In [38]:
walk.max()

np.int64(50)

In [39]:
np.abs(walk) >= 10

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [42]:
(np.abs(walk) >= 10).argmax()

np.int64(155)