In [19]:
import numpy as np
# a dtype is for determining things like storage and semantics per element - kind, width, and byte order
# for inspecting things:
A = np.array(
    [
        [1, 2, 3], 
        [4, 5, 6]
    ]
)
print(f"{A.dtype}, {A.itemsize}, {A.nbytes}, {A.strides}")

int64, 8, 48, (24, 8)


In [20]:
# For information
print(np.iinfo(np.int32))
print(np.finfo(np.float32))

Machine parameters for int32
---------------------------------------------------------------
min = -2147483648
max = 2147483647
---------------------------------------------------------------

Machine parameters for float32
---------------------------------------------------------------
precision =   6   resolution = 1.0000000e-06
machep =    -23   eps =        1.1920929e-07
negep =     -24   epsneg =     5.9604645e-08
minexp =   -126   tiny =       1.1754944e-38
maxexp =    128   max =        3.4028235e+38
nexp =        8   min =        -max
smallest_normal = 1.1754944e-38   smallest_subnormal = 1.4012985e-45
---------------------------------------------------------------



In [21]:
#booleans
B = np.array([True, False, True], dtype=np.bool_)
print(f"{B}, {B.itemsize}") # Booleans are stored as one byte

#Integers
C = np.array([1, 2, 3, 4, 5, 6], dtype=np.int8)
D = np.array([1, 2, 3, 4, 5, 6], dtype=np.int16)
E = np.array([1, 2, 3, 4, 5, 6], dtype=np.int32)
F = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
print(f"{C}, {C.itemsize}")
print(f"{D}, {D.itemsize}")
print(f"{E}, {E.itemsize}")
print(f"{F}, {F.itemsize}")

[ True False  True], 1
[1 2 3 4 5 6], 1
[1 2 3 4 5 6], 2
[1 2 3 4 5 6], 4
[1 2 3 4 5 6], 8


In [22]:
#Unisigned Integers
# data type in computer science that stores only non-negative whole numbers (zero and positive numbers), 
# with a range that extends from 0 up to a maximum value determined by the number of bits used
G = np.array([1, 2, 3, 4, 5, 6], dtype=np.uint8)
H = np.array([1, 2, 3, 4, 5, 6], dtype=np.uint16)
I = np.array([1, 2, 3, 4, 5, 6], dtype=np.uint32)
J = np.array([1, 2, 3, 4, 5, 6], dtype=np.uint64)
print(f"{G}, {G.itemsize}")
print(f"{H}, {H.itemsize}")
print(f"{I}, {I.itemsize}")
print(f"{J}, {J.itemsize}")

[1 2 3 4 5 6], 1
[1 2 3 4 5 6], 2
[1 2 3 4 5 6], 4
[1 2 3 4 5 6], 8


In [23]:
#Default for float is 64, but the ML default is 32
K = np.array([1, 2, 3, 4, 5], dtype=np.float16)
L = np.array([1, 2, 3, 4, 5], dtype=np.float32)
M = np.array([1, 2, 3, 4, 5], dtype=np.float64)
print(K)
print(L)
print(M)

[1. 2. 3. 4. 5.]
[1. 2. 3. 4. 5.]
[1. 2. 3. 4. 5.]


In [24]:
#Complex
N = np.array([1 + 2j], dtype=np.complex64)
O = np.array([1 + 2j], dtype=np.complex128)
print(N.itemsize)
print(O.itemsize)

8
16


In [25]:
#use explicit definition of the dtype during array creation
# Common traps default to float64: linspace, ones, randn-style outputs, Python floats.
rng = np.random.default_rng(0) #This method create a new generator instance with the seed 0. It gives you an isolated, reproducible random stream 
rng.standard_normal(1000, dtype=np.float32) #Draws 1,000 samples from a standard normal 𝑁(0,1) using that generator. 
# Returns a float32 array directly (no post-cast)

array([ 1.11762202e+00, -1.38712490e+00, -4.26571608e-01, -8.03587258e-01,
        6.01427734e-01, -7.49932453e-02,  5.96931949e-02, -3.20060886e-02,
       -1.85484678e-01,  1.20477366e+00,  7.77453065e-01, -1.35828102e+00,
        7.69827843e-01, -8.70226979e-01,  1.09976351e+00, -9.58474219e-01,
       -1.27493191e+00, -1.36533809e+00, -1.47432756e+00,  4.33531702e-01,
       -3.28120619e-01, -9.70968008e-01,  1.26550245e+00,  3.76360677e-03,
       -1.06654119e+00,  1.72889876e+00, -4.51382279e-01,  4.86281700e-02,
       -1.13824058e+00, -3.38293284e-01, -1.52973986e+00,  2.04066992e-01,
       -5.36706373e-02, -9.06851947e-01, -2.10764054e-02, -1.24412775e+00,
        8.32168758e-02, -2.47116044e-01,  1.31458926e+00,  9.20494974e-01,
       -2.28772566e-01, -3.04026175e-02,  7.35633401e-03, -1.64132237e-01,
       -1.50977643e-02,  1.22743702e+00,  1.04080665e+00,  6.79934442e-01,
        2.51833498e-01,  1.66519105e+00,  1.03471780e+00, -3.81819338e-01,
       -3.50518584e-01,  

In [26]:
#Casting - Conversion vs reinterpreting
#Convert values: astype(new_dtype, copy=False); returns view if already same dtype.
#Reinterpret bytes: .view(new_dtype); no value change, only how bytes are read.

P = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
Q = P.astype(np.float32)
R = P.view(np.uint32)
print(P)
print(Q)
print(R)
#Safe/unsafe casting flags appear in many APIs: 'no' < 'equiv' < 'safe' < 'same_kind' < 'unsafe'.
#Check feasibility: np.can_cast(from_dtype, to_dtype, casting='safe').
print(np.can_cast(np.uint32, np.bool_))

[1 2 3 4 5 6]
[1. 2. 3. 4. 5. 6.]
[1 0 2 0 3 0 4 0 5 0 6 0]
False


In [27]:
#Boolean Behavior
#Comparisons yield bool arrays.
#In arithmetic, bool → integers 0/1, then usual promotion.
S = np.array([True, False], dtype=bool)
print((S + 2).dtype ) #The bool array is converted to the type int64, and then 2 is added to the array.
#It becomes 1, 0, 2, and since 2 is a python int, the array type defaults to np.int_, which is int64 on 64 bit systems and int32 on 32 bit systems
print((S & (S==1)).dtype) #In this one, it's converted to booleans, and the type is displayed

int64
bool


In [28]:
#NaN/±inf exist only in floating and complex dtypes.
#Introducing np.nan or np.inf into integer arrays forces upcast to float.
#NaN means "Not a Number", and inf is infinity, which can be +inf or -inf
T = np.array([1, 2, 3], dtype=np.int32)
T = T.astype(np.float32)
T[0] = np.nan
#np.result_type(*args): “If I did an elementwise op on these, what dtype would NumPy use?”
#np.promote_types(t1, t2): “Smallest dtype both can be safely cast to.”

#Same kind → widen width: int32 + int64 -> int64, float32 + float64 -> float64.
#Integer with float → float, usually float64 if a Python float is involved.
#Keep float32 by avoiding Python float: use np.float32(1.0) or arrays of float32.
#Any complex operand → complex; real part and imag part width follow the widest real.
#Signed + unsigned → a wider integer if it can represent both ranges; if not, expect upcast to float or object. Avoid mixing signed and unsigned.
#Array + scalar: Python scalar types influence promotion (1.0 is 64-bit). Prefer NumPy scalars (np.float32(1)).

#Examples to memorize
np.ones(3, np.float32) + np.ones(3, np.float32)         # float32
np.ones(3, np.float32) + 1                              # float32 (int -> safely cast)
np.ones(3, np.float32) + 1.0                            # often float64 (Python float)
np.array([1], np.int32) + np.array([1], np.float32)     # often float64
np.array([1], np.int32) + np.array([1], np.int64)       # int64
np.array([1], np.uint8) + np.array([255], np.uint8)     # uint8 overflow wraps (mod 256)

array([0], dtype=uint8)

In [29]:
#Integers wrap on overflow; no warning by default. Use a wider dtype if near limits, or switch to float.
#In this case, what overflow means is when the sum of 2 scalar values in numpy becomes larger than the size of the data type that you're adding
print(np.uint8(255) + np.uint8(1))
#Unsigned wrap: uint8: 255+1→0
#Signed wrap (two’s complement): int8: 127+1→−128: int32: 2147483647 + 1 → −2147483648
#Float overflow is different: it yields inf (often with a warning). Integers never produce inf.

0


  print(np.uint8(255) + np.uint8(1))


In [30]:
#dtype may show < little-endian or > big-endian. Use native: np.dtype('float32').newbyteorder('=').
#Only care when reading external binary formats.

In [31]:
#Utilities
U = np.array([1+2j, 3+4j], dtype=np.complex64)
print(U)
print(U.real, U.imag)          # views
print(np.abs(U))              # magnitude, float
print(np.angle(U))               # phase
print(U.astype(np.float32))      # drops imaginary part (same-kind cast, not “safe”)

[1.+2.j 3.+4.j]
[1. 3.] [2. 4.]
[2.236068 5.      ]
[1.1071488 0.9272952]
[1. 3.]


  print(U.astype(np.float32))      # drops imaginary part (same-kind cast, not “safe”)


In [None]:
#Inspecting and enforcing consistency
V = np.asarray([1, 2, 3, 4, 5, 6], dtype=np.float32)   # ensure float32
assert V.dtype == np.float32
#itemsize controls memory. float32 halves memory vs float64.
#CPU vectorization can favor native width (float64 on some CPUs), but for ML pipelines float32 is standard.
#Avoid repeated casts inside loops. Cast once at load time.

In [34]:
V.dtype, V.itemsize, V.nbytes
print(np.unique(V.dtype, return_counts=True)) #finds distinct values and returns them in a sorted list
np.set_printoptions(precision=6, suppress=True)

(array([dtype('float32')], dtype=object), array([1]))


In [None]:
"""
 - Always set dtype on creation.
 - Use np.result_type when mixing arrays.
 - Avoid mixing signed and unsigned.
 - Avoid Python floats when holding float32 pipelines.
 - Know limits with iinfo/finfo. Avoid integer overflow.
 - Use astype(copy=False) and .view() correctly.
"""

In [41]:
#Create these and report dtype, itemsize:
#a) np.array([True, False])
A = np.array([True, False], dtype=bool)
print(A.dtype)
print(A.itemsize)
#b) np.arange(5, dtype=np.uint16)
B = np.arange(5, dtype=np.uint16)
print(B.dtype)
print(B.itemsize)
#c) np.linspace(-1, 1, 11, dtype=np.float32)
C = np.linspace(-1, 1, 11, dtype=np.float32)
print(C.dtype)
print(C.itemsize)
#d) np.array([1+2j, 3+0j], dtype=np.complex128)
D = np.array([1+2j, 3+0j], dtype=np.complex128)
print(D.dtype)
print(D.itemsize)
#Show three ways to obtain a float32 vector [0, 0.5, 1.0] without post-casting.
E = np.linspace(0, 1, 3, dtype=np.float32)
print(E)
F = np.array([0, 0.5, 1.0], dtype=np.float32)
print(F)
G = np.arange(3, dtype=np.float32) * np.float32(0.5)
print(G)
#Given two arrays a:int32, b:float32, make their sum remain float32 despite Python floats.
H = np.array([1, 2, 3, 4, 5, 6], dtype=np.int32)
I = np.array([7, 8, 9, 10, 11, 12], dtype=np.float32)
J = np.array(H + I, dtype=np.float32)
print(J.dtype)
#Demonstrate integer overflow with uint8 and then fix it by promoting dtype.

#Use np.promote_types to find the common dtype for int16 and uint32. Explain the result in one sentence.

#Reinterpret the bytes of a float32 array as int32 with .view, then restore the original view.

#Build a small complex array and extract magnitude and phase as float32.

#Load random float64 data and convert once to float32. Verify memory drop via nbytes.

bool
1
uint16
2
float32
4
complex128
16
[0.  0.5 1. ]
[0.  0.5 1. ]
[0.  0.5 1. ]
float32
