---
#image: dataloading.png
title: Advanced Numpy
subtitle: 
date: '2024-03-01'
categories: [Python, bigdata, projects]
author: Kunal Khurana
jupyter: python3
toc: True
---

In [1]:
import numpy as np
import pandas as pd

In [2]:
rng = np.random.default_rng(seed=12345)

In [3]:
np.ones((1,3)).shape

(1, 3)

In [4]:
np.ones((23,5,53), dtype=np.float64).strides

(2120, 424, 8)

### Numpy Data type hierarchy

In [5]:
ints = np.ones(10, dtype=np.uint16)

In [6]:
floats = np.ones(10, dtype=np.float32)


In [7]:
ints

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=uint16)

In [8]:
floats

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32)

In [9]:
np.issubdtype(ints.dtype, np.integer)
np.issubdtype(floats.dtype, np.floating)

True

In [10]:
# parent classes of specific data by calling mro method:


np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [11]:
np.issubdtype(ints.dtype, np.number)

True

In [12]:
np.issubdtype(ints.dtype, np.number)

True

### Advanced array manipulation

In [13]:
# reshaping
arr = np.arange(8)

In [14]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [16]:
arr.reshape((4,2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [17]:
arr.reshape((4,2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [18]:
arr = np.arange(15)


In [20]:
arr.reshape((5, -3))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [21]:
# since arrays's shape attribute is tuple, we may reshape it too
other_arr = np.ones((3, 5))

In [22]:
other_arr.shape

(3, 5)

In [23]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [27]:
# flattening or raveling
# opposite operation of reshape

arr2 = np.arange(15).reshape((5,3))

In [28]:
arr2

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [29]:
arr2.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [30]:
# flatten - same as ravel, but returns a copy of data

arr2.flatten()


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

### C versus Fortran order

In [31]:
arr = np.arange(12).reshape((3,4))

In [32]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [33]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [34]:
arr.ravel("F")

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

### Concatenating and Splitting Arrays

In [36]:
arr3 = np.array([[1,2,4],[5,6,77]])

arr4 = np.array([[2,4,5],[12,44,2]])

In [38]:
np.concatenate([arr3, arr4], axis=0)

array([[ 1,  2,  4],
       [ 5,  6, 77],
       [ 2,  4,  5],
       [12, 44,  2]])

In [39]:
np.concatenate([arr3, arr4], axis=1)

array([[ 1,  2,  4,  2,  4,  5],
       [ 5,  6, 77, 12, 44,  2]])

In [40]:
# vstack and hstack

np.stack((arr3, arr4))

array([[[ 1,  2,  4],
        [ 5,  6, 77]],

       [[ 2,  4,  5],
        [12, 44,  2]]])

In [41]:
np.vstack((arr3, arr4))

array([[ 1,  2,  4],
       [ 5,  6, 77],
       [ 2,  4,  5],
       [12, 44,  2]])

In [42]:
np.hstack((arr3, arr4))

array([[ 1,  2,  4,  2,  4,  5],
       [ 5,  6, 77, 12, 44,  2]])

In [43]:
# split
arr_split = rng.standard_normal((2,4))

In [44]:
arr_split

array([[-1.42382504,  1.26372846, -0.87066174, -0.25917323],
       [-0.07534331, -0.74088465, -1.3677927 ,  0.6488928 ]])