# NumPy Comprehensive Guide

## What is NumPy?

NumPy (Numerical Python) is the fundamental package for scientific computing in Python. It provides:

- **Multi-dimensional arrays**: Efficient data structures for storing and manipulating large datasets
- **Mathematical functions**: Fast operations on arrays without loops
- **Linear algebra**: Tools for matrix operations and mathematical computations
- **Random number generation**: Various probability distributions
- **Integration**: Works seamlessly with other scientific libraries (Pandas, Matplotlib, SciPy)

## Why NumPy?

- **Speed**: Written in C, much faster than Python lists
- **Memory efficiency**: Optimized memory layout
- **Vectorization**: Operations on entire arrays at once
- **Broadcasting**: Automatic handling of different array shapes

In [None]:
# l = [[1,2,3,4,5], 
#      [6,7,8,9,10], 
#      [11,12,13,14,15]]
l = [[[1,2,3,4,5], 
      [6,7,8,9,10], 
      [11,12,13,14,15]],
      [[1,2,3,4,5], 
      [6,7,8,9,10], 
      [11,12,13,14,15]]]
l

[[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]],
 [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]]

In [10]:
for i in l:
    for j in i:
        for k in j:
            print(k)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15


In [1]:
# Import NumPy
import numpy as np

# Check NumPy version
print(f"NumPy version: {np.__version__}")

NumPy version: 2.3.2


In [5]:
matrix_1 = np.array([[1, 2], [3, 4]])

matrix_2 = np.array([[5, 6], [7, 7]])

matrix_3 = matrix_1 + matrix_2
matrix_3

array([[ 6,  8],
       [10, 11]])

## 1. Creating Arrays

### Basic Array Creation

In [16]:
# Create arrays from Python lists
arr1 = np.array([1, 2, 3, 4, 5])
arr2 = np.array([[1, 2, 3], [4, 5, 6]])
arr3 = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[1, 2], [3, 4]]])

print("1D Array:", arr1)
print("Shape:", arr1.shape)
print("Type:", type(arr1))
print("\n2D Array:")
print(arr2)
print("Shape:", arr2.shape)
print("Dimensions:", arr2.ndim)
print("\n3D Array:")
print(arr3)
print("Shape:", arr3.shape)
print("Dimensions:", arr3.ndim)

1D Array: [1 2 3 4 5]
Shape: (5,)
Type: <class 'numpy.ndarray'>

2D Array:
[[1 2 3]
 [4 5 6]]
Shape: (2, 3)
Dimensions: 2

3D Array:
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]

 [[1 2]
  [3 4]]]
Shape: (3, 2, 2)
Dimensions: 3


In [22]:
# Special arrays
zeros = np.zeros((3, 4))  # 3x4 array of zeros
ones = np.ones((2, 3))    # 2x3 array of ones
eye = np.eye(4)           # 4x4 identity matrix
range_arr = np.arange(0, 20, 3)  # 0 to 18 step 2
linspace = np.linspace(0, 1, 10)  # 5 evenly spaced values from 0 to 1

print("Zeros:")
print(zeros)
print("\nOnes:")
print(ones)
print("\nIdentity:")
print(eye)
print("\nRange:", range_arr)
print("\nLinspace:", linspace)

Zeros:
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]

Ones:
[[1. 1. 1.]
 [1. 1. 1.]]

Identity:
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]

Range: [ 0  3  6  9 12 15 18]

Linspace: [0.         0.11111111 0.22222222 0.33333333 0.44444444 0.55555556
 0.66666667 0.77777778 0.88888889 1.        ]


## 2. Vectors and Tensors

### Understanding Dimensions

- **0D (Scalar)**: Single number
- **1D (Vector)**: Array of numbers
- **2D (Matrix)**: Array of arrays
- **3D+ (Tensor)**: Higher dimensional arrays

In [25]:
# Different dimensional arrays
scalar = np.array(42)           # 0D
vector = np.array([1, 2, 3])    # 1D
matrix = np.array([[1, 2], [3, 4]])  # 2D
tensor_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])  # 3D

print(f"Scalar (0D): {scalar}, shape: {scalar.shape}, dims: {scalar.ndim}")
print(f"Vector (1D): {vector}, shape: {vector.shape}, dims: {vector.ndim}")
print(f"Matrix (2D): shape: {matrix.shape}, dims: {matrix.ndim}")
print(f"Tensor (3D): shape: {tensor_3d.shape}, dims: {tensor_3d.ndim}")

# Reshaping arrays
arr = np.arange(12)
reshaped_2d = arr.reshape(3, 4)
reshaped_3d = arr.reshape(2, 2, 3)

print(f"\nOriginal: {arr}")
print(f"Reshaped 2D:\n{reshaped_2d}")
print(f"Reshaped 3D:\n{reshaped_3d}")

Scalar (0D): 42, shape: (), dims: 0
Vector (1D): [1 2 3], shape: (3,), dims: 1
Matrix (2D): shape: (2, 2), dims: 2
Tensor (3D): shape: (2, 2, 2), dims: 3

Original: [ 0  1  2  3  4  5  6  7  8  9 10 11]
Reshaped 2D:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
Reshaped 3D:
[[[ 0  1  2]
  [ 3  4  5]]

 [[ 6  7  8]
  [ 9 10 11]]]


In [26]:
reshaped_3d.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

## 3. Common Operations

### Element-wise Operations

In [27]:
# Arithmetic operations
a = np.array([1, 2, 3, 4])
b = np.array([5, 6, 7, 8])

print(f"a: {a}")
print(f"b: {b}")
print(f"a + b: {a + b}")
print(f"a - b: {a - b}")
print(f"a * b: {a * b}")
print(f"a / b: {a / b}")
print(f"a ** 2: {a ** 2}")
print(f"sqrt(a): {np.sqrt(a)}")
print(f"sin(a): {np.sin(a)}")

a: [1 2 3 4]
b: [5 6 7 8]
a + b: [ 6  8 10 12]
a - b: [-4 -4 -4 -4]
a * b: [ 5 12 21 32]
a / b: [0.2        0.33333333 0.42857143 0.5       ]
a ** 2: [ 1  4  9 16]
sqrt(a): [1.         1.41421356 1.73205081 2.        ]
sin(a): [ 0.84147098  0.90929743  0.14112001 -0.7568025 ]


In [28]:
# Matrix operations
matrix_a = np.array([[1, 2], [3, 4]])
matrix_b = np.array([[5, 6], [7, 8]])

print("Matrix A:")
print(matrix_a)
print("\nMatrix B:")
print(matrix_b)

# Element-wise multiplication
print("\nElement-wise multiplication (A * B):")
print(matrix_a * matrix_b)

# Matrix multiplication
print("\nMatrix multiplication (A @ B):")
print(matrix_a @ matrix_b)

# Transpose
print("\nTranspose of A:")
print(matrix_a.T)

Matrix A:
[[1 2]
 [3 4]]

Matrix B:
[[5 6]
 [7 8]]

Element-wise multiplication (A * B):
[[ 5 12]
 [21 32]]

Matrix multiplication (A @ B):
[[19 22]
 [43 50]]

Transpose of A:
[[1 3]
 [2 4]]


In [29]:
# Statistical operations
data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

print(f"Data: {data}")
print(f"Mean: {np.mean(data):.2f}")
print(f"Median: {np.median(data):.2f}")
print(f"Standard deviation: {np.std(data):.2f}")
print(f"Variance: {np.var(data):.2f}")
print(f"Min: {np.min(data)}")
print(f"Max: {np.max(data)}")
print(f"Sum: {np.sum(data)}")
print(f"Product: {np.prod(data)}")

Data: [ 1  2  3  4  5  6  7  8  9 10]
Mean: 5.50
Median: 5.50
Standard deviation: 2.87
Variance: 8.25
Min: 1
Max: 10
Sum: 55
Product: 3628800


## 4. Indexing and Slicing

### Basic Indexing

In [33]:
l = [1,4,7,9,14]

In [41]:
l[::-1]

[14, 9, 7, 4, 1]

In [43]:
# 1D array indexing
arr = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100])

print(f"Original array: {arr}")
print(f"First element: {arr[0]}")
print(f"Last element: {arr[-1]}")
print(f"Elements 2 to 5: {arr[2:6]}")
print(f"Every 2nd element: {arr[::2]}")
print(f"Reverse array: {arr[::-1]}")
print(f"Last 3 elements: {arr[-4:]}")

Original array: [ 10  20  30  40  50  60  70  80  90 100]
First element: 10
Last element: 100
Elements 2 to 5: [30 40 50 60]
Every 2nd element: [10 30 50 70 90]
Reverse array: [100  90  80  70  60  50  40  30  20  10]
Last 3 elements: [ 70  80  90 100]


In [22]:
arr = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
# arr[5::2] # si
# arr[5::] # no
# arr[-5::2] # si
# arr[-5:-1:2] # no
arr[5:9:2]


array([60, 80])

In [None]:
# 2D array indexing
matrix = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])

print("Matrix:")
print(matrix)
print(f"\nShape: {matrix.shape}")
print(f"Element at row 1, column 2: {matrix[1, 2]}")
print(f"Row 1: {matrix[1, :]}")
print(f"Column 2: {matrix[:, 2]}")
print(f"Submatrix (rows 0-1, columns 1-3):")
print(matrix[0:2, 1:4])

Matrix:
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

Shape: (3, 4)
Element at row 1, column 2: 7
Row 1: [5 6 7 8]
Column 2: [ 3  7 11]
Submatrix (rows 0-1, columns 1-3):
[[2 3 4]
 [6 7 8]]


In [49]:
# Boolean indexing
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

# Create boolean mask
mask = arr > 5
print(f"Array: {arr}")
print(f"Mask (arr > 5): {mask}")
print(f"Elements > 5: {arr[mask]}")

# Multiple conditions
# & (bitwise AND)
# | (bitwise OR)
mask2 = (arr > 3) & (arr < 8)
print(f"\nMask (3 < arr < 8): {mask2}")
print(f"Elements 3 < arr < 8: {arr[mask2]}")

# Using where function
indices = np.where(arr > 5)
print(f"\nIndices where arr > 5: {indices[0]}")
print(f"Values at those indices: {arr[indices]}")

Array: [ 1  2  3  4  5  6  7  8  9 10]
Mask (arr > 5): [False False False False False  True  True  True  True  True]
Elements > 5: [ 6  7  8  9 10]

Mask (3 < arr < 8): [False False False  True  True  True  True False False False]
Elements 3 < arr < 8: [4 5 6 7]

Indices where arr > 5: [5 6 7 8 9]
Values at those indices: [ 6  7  8  9 10]


In [48]:
arr

array([4, 5, 6, 7])

## 5. Data Wrangling

### Reshaping, Concatenating, and Splitting

In [50]:
# Reshaping arrays
arr = np.arange(24)
print(f"Original array: {arr}")
print(f"Shape: {arr.shape}")

# Reshape to 2D
arr_2d = arr.reshape(4, 6)
print(f"\nReshaped to 4x6:")
print(arr_2d)

# Reshape to 3D
arr_3d = arr.reshape(2, 3, 4)
print(f"\nReshaped to 2x3x4:")
print(arr_3d)

# Flatten back to 1D
arr_flat = arr_3d.flatten()
print(f"\nFlattened: {arr_flat}")

Original array: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
Shape: (24,)

Reshaped to 4x6:
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]

Reshaped to 2x3x4:
[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]

Flattened: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]


In [53]:
# Concatenating arrays
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])
arr3 = np.array([7, 8, 9])

# Vertical concatenation (stacking rows)
vertical = np.vstack([arr1, arr2, arr3])
print("Vertical concatenation:")
print(vertical)

# Horizontal concatenation (stacking columns)
horizontal = np.hstack([arr1, arr2, arr3])
print(f"\nHorizontal concatenation: {horizontal}")

# Using concatenate function
axis0 = np.concatenate([arr1, arr2, arr3], axis=0)
print(f"\nConcatenate axis=0: {axis0}")

# axis1 = np.concatenate([arr1, arr2, arr3], axis=1)
# print(f"\nConcatenate axis=0: {axis1}")

# axis2 = np.concatenate([arr1, arr2, arr3], axis=2)
# print(f"\nConcatenate axis=0: {axis2}")

# 2D arrays
matrix1 = np.array([[1, 2], [3, 4]])
matrix2 = np.array([[5, 6], [7, 8]])

print(f"\nMatrix 1:\n{matrix1}")
print(f"Matrix 2:\n{matrix2}")
print(f"\nConcatenated along rows:")
print(np.concatenate([matrix1, matrix2], axis=0))
print(f"\nConcatenated along columns:")
print(np.concatenate([matrix1, matrix2], axis=1))

Vertical concatenation:
[[1 2 3]
 [4 5 6]
 [7 8 9]]

Horizontal concatenation: [1 2 3 4 5 6 7 8 9]

Concatenate axis=0: [1 2 3 4 5 6 7 8 9]

Matrix 1:
[[1 2]
 [3 4]]
Matrix 2:
[[5 6]
 [7 8]]

Concatenated along rows:
[[1 2]
 [3 4]
 [5 6]
 [7 8]]

Concatenated along columns:
[[1 2 5 6]
 [3 4 7 8]]


In [54]:
# Splitting arrays
arr = np.arange(12)
print(f"Original array: {arr}")

# Split into 3 equal parts
parts = np.split(arr, 3)
print(f"\nSplit into 3 parts:")
for i, part in enumerate(parts):
    print(f"Part {i}: {part}")

# Split at specific indices
split_indices = [3, 7]
parts_custom = np.split(arr, split_indices)
print(f"\nSplit at indices {split_indices}:")
for i, part in enumerate(parts_custom):
    print(f"Part {i}: {part}")

# Horizontal and vertical splitting for 2D arrays
matrix = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
print(f"\nMatrix:\n{matrix}")

h_parts = np.hsplit(matrix, 2)
print(f"\nHorizontal split into 2:")
for i, part in enumerate(h_parts):
    print(f"Part {i}:\n{part}")

v_parts = np.vsplit(matrix, 3)
print(f"\nVertical split into 3:")
for i, part in enumerate(v_parts):
    print(f"Part {i}:\n{part}")

Original array: [ 0  1  2  3  4  5  6  7  8  9 10 11]

Split into 3 parts:
Part 0: [0 1 2 3]
Part 1: [4 5 6 7]
Part 2: [ 8  9 10 11]

Split at indices [3, 7]:
Part 0: [0 1 2]
Part 1: [3 4 5 6]
Part 2: [ 7  8  9 10 11]

Matrix:
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

Horizontal split into 2:
Part 0:
[[ 1  2]
 [ 5  6]
 [ 9 10]]
Part 1:
[[ 3  4]
 [ 7  8]
 [11 12]]

Vertical split into 3:
Part 0:
[[1 2 3 4]]
Part 1:
[[5 6 7 8]]
Part 2:
[[ 9 10 11 12]]


## 6. Broadcasting

NumPy's broadcasting allows operations between arrays of different shapes.

In [6]:
# Broadcasting examples
arr = np.array([1, 2, 3, 4])
scalar = 10

print(f"Array: {arr}")
print(f"Scalar: {scalar}")
print(f"Array + scalar: {arr + scalar}")
print(f"Array * scalar: {arr * scalar}")

# Broadcasting with different shapes
matrix = np.array([[1, 2, 3], [4, 5, 6]])
vector = np.array([10, 20, 30])

print(f"\nMatrix:\n{matrix}")
print(f"Vector: {vector}")
print(f"\nMatrix + vector (broadcasting):")
print(matrix + vector)

# Broadcasting rules
arr_2d = np.array([[1], [2], [3]])
arr_1d = np.array([10, 20, 30])

print(f"\n2D array:\n{arr_2d}")
print(f"1D array: {arr_1d}")
print(f"\n2D + 1D (broadcasting):")
print(arr_2d + arr_1d)

Array: [1 2 3 4]
Scalar: 10
Array + scalar: [11 12 13 14]
Array * scalar: [10 20 30 40]

Matrix:
[[1 2 3]
 [4 5 6]]
Vector: [10 20 30]

Matrix + vector (broadcasting):
[[11 22 33]
 [14 25 36]]

2D array:
[[1]
 [2]
 [3]]
1D array: [10 20 30]

2D + 1D (broadcasting):
[[11 21 31]
 [12 22 32]
 [13 23 33]]


## 7. Practice Exercises

### Basic Exercises

**Exercise 1: Array Creation**
Create a 3x4 matrix filled with random integers between 1 and 100.

In [None]:
# Your code here
# Hint: Use np.random.randint()



**Exercise 2: Statistical Analysis**
Given an array of student scores [85, 92, 78, 96, 88, 75, 89, 91], calculate:
- Mean, median, standard deviation
- Number of students above 90
- Percentage of students above 85

In [None]:
# Your code here
scores = np.array([85, 92, 78, 96, 88, 75, 89, 91])


**Exercise 3: Matrix Operations**
Create two 2x2 matrices and perform:
- Element-wise multiplication
- Matrix multiplication
- Calculate determinant and inverse

In [None]:
# Your code here
# Hint: Use np.linalg.det() and np.linalg.inv()


**Exercise 4: Data Filtering**
Create an array of 20 random numbers between 0 and 100, then:
- Find all numbers divisible by 3
- Find all numbers that are perfect squares
- Create a boolean mask for numbers between 25 and 75

In [None]:
# Your code here
# Hint: Use np.mod() for remainder and np.sqrt() for square root


**Exercise 5: Array Reshaping**
Create a 1D array of 24 elements and reshape it into:
- A 2x12 matrix
- A 3x8 matrix
- A 2x3x4 tensor
- Then flatten it back to 1D

In [None]:
# Your code here
# Hint: Use .reshape() and .flatten() methods


## 8. Complex Exercises

### Advanced Problems to Test Your Understanding

**Complex Exercise 1: Image Processing Simulation**

Create a 10x10 "image" represented as a 2D array where:
- Values 0-50 represent dark pixels
- Values 51-100 represent medium pixels  
- Values 101-255 represent bright pixels

Then:
1. Generate a random image with values 0-255
2. Apply a filter that increases brightness by 20% for all pixels
3. Create a mask for pixels that are too bright (>200) and reduce them by 30%
4. Calculate the histogram of pixel values
5. Find the coordinates of the brightest and darkest pixels

In [None]:
# Your code here
# This is a complex exercise that combines multiple concepts

# Solution:
# # 1. Generate random image
# image = np.random.randint(0, 256, (10, 10))
# print("Original image:")
# print(image)
# 
# # 2. Increase brightness by 20%
# brightened = np.clip(image * 1.2, 0, 255).astype(int)
# print("\nBrightened image:")
# print(brightened)
# 
# # 3. Reduce too bright pixels
# bright_mask = brightened > 200
# brightened[bright_mask] = (brightened[bright_mask] * 0.7).astype(int)
# print("\nAdjusted image:")
# print(brightened)
# 
# # 4. Histogram
# hist, bins = np.histogram(brightened, bins=range(0, 257, 25))
# print(f"\nHistogram bins: {bins[:-1]}")
# print(f"Histogram counts: {hist}")
# 
# # 5. Find brightest and darkest
# max_coords = np.unravel_index(np.argmax(brightened), brightened.shape)
# min_coords = np.unravel_index(np.argmin(brightened), brightened.shape)
# print(f"\nBrightest pixel at {max_coords}: {brightened[max_coords]}")
# print(f"Darkest pixel at {min_coords}: {brightened[min_coords]}")

**Complex Exercise 2: Financial Data Analysis**

Simulate 100 days of stock price data where:
- Initial price is $100
- Daily returns follow a normal distribution with mean 0.001 and std 0.02
- Calculate daily prices from returns

Then:
1. Calculate daily returns from prices
2. Find the maximum drawdown (largest peak-to-trough decline)
3. Calculate rolling 10-day volatility
4. Identify days with returns > 2 standard deviations
5. Calculate Value at Risk (VaR) at 95% confidence level

In [None]:
# Your code here
# This exercise tests understanding of financial calculations and array operations

# Solution:
# # Generate daily returns
# np.random.seed(42)
# daily_returns = np.random.normal(0.001, 0.02, 100)
# 
# # Calculate prices
# prices = np.zeros(100)
# prices[0] = 100
# for i in range(1, 100):
#     prices[i] = prices[i-1] * (1 + daily_returns[i])
# 
# # 1. Calculate returns from prices
# calculated_returns = np.diff(prices) / prices[:-1]
# 
# # 2. Maximum drawdown
# peak = np.maximum.accumulate(prices)
# drawdown = (prices - peak) / peak
# max_drawdown = np.min(drawdown)
# 
# # 3. Rolling volatility
# rolling_vol = np.array([np.std(calculated_returns[i:i+10]) for i in range(90)])
# 
# # 4. Outlier days
# threshold = 2 * np.std(calculated_returns)
# outlier_days = np.where(np.abs(calculated_returns) > threshold)[0]
# 
# # 5. VaR
# var_95 = np.percentile(calculated_returns, 5)
# 
# print(f"Max drawdown: {max_drawdown:.4f}")
# print(f"Outlier days: {outlier_days}")
# print(f"VaR 95%: {var_95:.4f}")

**Complex Exercise 3: Machine Learning Data Preparation**

Create a synthetic dataset for machine learning with:
- 1000 samples
- 5 features (X) and 1 target (y)
- Features: 2 numerical, 2 categorical (encoded), 1 binary
- Target: binary classification (0 or 1)

Then:
1. Split data into training (70%) and testing (30%) sets
2. Normalize numerical features to [0,1] range
3. Create feature correlation matrix
4. Handle missing values (add 5% random missing values)
5. Create polynomial features for numerical columns
6. Calculate class balance and create balanced dataset if needed

In [None]:
# Your code here
# This exercise combines data manipulation, statistics, and ML concepts

# Solution:
# np.random.seed(42)
# n_samples = 1000
# 
# # Generate features
# numerical1 = np.random.normal(0, 1, n_samples)
# numerical2 = np.random.uniform(-5, 5, n_samples)
# categorical1 = np.random.choice(['A', 'B', 'C'], n_samples)
# categorical2 = np.random.choice(['X', 'Y', 'Z'], n_samples)
# binary = np.random.choice([0, 1], n_samples)
# 
# # Encode categorical variables
# cat1_encoded = np.array([['A', 'B', 'C'].index(x) for x in categorical1])
# cat2_encoded = np.array([['X', 'Y', 'Z'].index(x) for x in categorical2])
# 
# # Create target (simple rule-based)
# target = ((numerical1 > 0) & (numerical2 > 0) & (binary == 1)).astype(int)
# 
# # Combine features
# X = np.column_stack([numerical1, numerical2, cat1_encoded, cat2_encoded, binary])
# 
# # 1. Split data
# split_idx = int(0.7 * n_samples)
# X_train, X_test = X[:split_idx], X[split_idx:]
# y_train, y_test = target[:split_idx], target[split_idx:]
# 
# # 2. Normalize numerical features
# X_train_norm = X_train.copy()
# X_test_norm = X_test.copy()
# 
# for i in [0, 1]:  # numerical columns
#     min_val = np.min(X_train[:, i])
#     max_val = np.max(X_train[:, i])
#     X_train_norm[:, i] = (X_train[:, i] - min_val) / (max_val - min_val)
#     X_test_norm[:, i] = (X_test[:, i] - min_val) / (max_val - min_val)
# 
# # 3. Correlation matrix
# corr_matrix = np.corrcoef(X_train_norm.T)
# 
# # 4. Add missing values
# missing_mask = np.random.random(X_train_norm.shape) < 0.05
# X_train_missing = X_train_norm.copy()
# X_train_missing[missing_mask] = np.nan
# 
# # 5. Polynomial features
# poly_features = np.column_stack([
#     X_train_norm[:, 0],  # numerical1
#     X_train_norm[:, 1],  # numerical2
#     X_train_norm[:, 0] ** 2,  # numerical1^2
#     X_train_norm[:, 1] ** 2,  # numerical2^2
#     X_train_norm[:, 0] * X_train_norm[:, 1]  # interaction
# ])
# 
# # 6. Class balance
# class_counts = np.bincount(y_train)
# print(f"Class distribution: {class_counts}")
# print(f"Training samples: {X_train.shape[0]}")
# print(f"Testing samples: {X_test.shape[0]}")
# print(f"Feature correlation shape: {corr_matrix.shape}")
# print(f"Missing values: {np.sum(missing_mask)}")
# print(f"Polynomial features shape: {poly_features.shape}")