In [None]:
import numpy as np
import pandas as pd

Almost all functions that summarize or clean data default to axis=0. This is because Data Engineers usually want to know things about a "Feature" (Column) across all "Records" (Rows).

.drop() is the most common exception, for colm use axis=1

## "Axis 0 acts on Rows; Axis 1 acts on Columns."
#### act = smashing them down / act on / appling 

In [None]:
# Create a 3x3 grid
data = [[10, 20, 30, 39], 
        [40, 50, 60, 69], 
        [70, 80, 90, 99]]
df = pd.DataFrame(data, columns=['A', 'B', 'C', 'D'], index=['Row1', 'Row2', 'Row3'])
df  # shape=(3,4)

In [None]:
print(df.sum(axis=0))  # You fall down into it (Vertical)

In [None]:
print(df.sum(axis=1))  # You walk along it (Horizontal)

In [None]:
print(df.sum(axis=None))  # in future it will be single scaler(like numpy), 
                          # currently its same as 0, in pd better use axis=0 or dont use if not needed

In [None]:
df_no_row = df.drop('Row1', axis=0)  # You fall down into it (Vertical) - DOESNT MAKE SENSE
df_no_row

In [None]:
df_no_col = df.drop('B', axis=1)  # find B in horizontal walk - DOESNT MAKE SENSE
df_no_col

In [None]:
df_no_row = df.max()  # default axis=0, You fall down into it (Vertical)
df_no_row

In [None]:
result = df.max(axis=1)  # You walk along it (Horizontal)
result

In [None]:
arr = np.array([[1, 2], [3, 4]])
arr

In [None]:
print(np.sum(arr, axis=0))  # You fall down into it (Vertical)

In [None]:
print(np.sum(arr, axis=1))  # find B in horizontal walk

In [None]:
print(np.sum(arr, axis=None))  # whole sum

### Now lets practice

In [None]:
a = np.array([[10, 2]])
b = np.array([[3, 40]])
a,b

In [None]:
c = np.concatenate( (a, b), axis=0 )
c

In [None]:
d = np.concatenate( (a, b), axis=1 )
d

In [None]:
df = pd.DataFrame({'Price': [100, 200], 'Tax': [10, 20]})
df

In [None]:
df.apply(lambda row: row['Price'] + row['Tax'], axis=1)

In [None]:
df.apply(lambda col: col.max(), axis=0)

In [None]:
from sklearn.preprocessing import normalize
X = np.array([[1, 10], 
              [2, 20]])
X

In [None]:
normalize(X, axis=1)  # walk along it

In [None]:
normalize(X, axis=0) 

In [None]:
df = pd.DataFrame([[True, False], [True, True]], columns='C1 C2'.split())
df

In [None]:
df.any(axis=0)

In [None]:
df.all(axis=1)

In [None]:
import numpy as np

# Creating a 3D array (2 pages, 3 rows, 4 columns)
# Shape: (2, 3, 4)
data3d = np.arange(24).reshape(2, 3, 4)

print("Original 3D Array Shape:", data3d.shape)
data3d
# [ [[ 0  1  2  3], [ 4  5  6  7], [ 8  9 10 11]],  <-- Page 0
#   [[12 13 14 15], [16 17 18 19], [20 21 22 23]] ] <-- Page 1

In [None]:
page_sum = data3d.sum(axis=0) # Squish the Pages
# The 2 pages disappear. Result shape: (3, 4)
# Result: Page 0 + Page 1 (e.g., 0+12, 1+13...)
page_sum

In [None]:
row_totals = data3d.sum(axis=-1)  # same as axis=2 so where 4
# You go to the deepest level (the innermost list) and collapse the numbers.
row_totals

In [None]:
# The 3 rows disappear. Result shape: (2, 4)
col_max = data3d.max(axis=1)
col_max

In [None]:
# Transform
# Shape stays (2, 3, 4). Page 1 now includes Page 0's values.
running_total = data3d.cumsum(axis=0)
running_total

In [None]:
import torch

# Create a 3D Tensor: 2 blocks, 3 rows, 4 columns
# Shape: [2, 3, 4]
x = torch.arange(24).reshape(2, 3, 4).float()
print(x)
# Example A: Squish the innermost dimension (Columns)
# Pointer -1 points to the 4.
result_a = x.sum(dim=-1)
print(result_a)
# Shape is now [2, 3]

In [None]:
# Example B: Softmax (Very common in AI)
# Softmax on dim=-1 makes all numbers in the innermost list add up to 1.0.
probs = torch.softmax(x, dim=-1)
print(probs)
# Example C: Concatenation
# Combining two tensors of shape [2, 3, 4]

In [None]:
y = torch.ones(2, 3, 4)
# dim=0: Stacks them to [4, 3, 4] (Double the pages)
# dim=1: Stacks them to [2, 6, 4] (Double the rows)
# dim=2: Stacks them to [2, 3, 8] (Double the columns)
combined = torch.cat((x, y), dim=1)
print(combined)

## For numpy 3d operation - Caution: This is MADNESS

In [None]:
import numpy as np

# Create 3D data: Shape (2, 3, 4)
# Index 0: 2 (Pages)
# Index 1: 3 (Rows)
# Index 2: 4 (Columns)
data = np.arange(24).reshape(2, 3, 4)

print("--- ORIGINAL DATA (2, 3, 4) ---")
print(data)

In [None]:
# 1. AGGREGATION: Squish the dimension at the pointer
# axis=0: Squish the 2 pages into 1 (Stacking them)
sum_pages = data.sum(axis=0) # Result shape: (3, 4)
sum_pages

In [None]:
mean1 = data.mean(axis=1)
mean1

In [None]:
max2 = data.max(axis=2)
max2

In [None]:
# 2. DELETION: Point to the dimension, then the slice index
# Delete the second page (index 1 of the 'Pages' dimension)
data_no_page = np.delete(data, 1, axis=0) # not inplace, creates new obj
data_no_page

In [None]:
data

In [None]:
data_del = np.delete(data, 0, axis=1)  # 0 is offset, so 0th row removed  !!? LITLLE CONFUSED AGAIN ðŸ¤¯
data_del

In [None]:
# 3. TRANSFORMATION: Move ALONG the dimension
# Accumulate values across pages (Page 0 + Page 1)
cum_sum = data.cumsum(axis=0)
cum_sum

In [None]:
# 4. INSERTION: Create a gap in the dimension
# Insert a new "Page" of zeros at the beginning
new_page = np.zeros((1, 3, 4))
print(new_page)
data_plus_page = np.insert(data, obj=0, values=0, axis=0)  # obj -> index pointer, values 0 is broadcasted
print(data_plus_page)

In [None]:
# 5. SELECTION: Take a specific slice
print(data)
res = np.take(data, -1, axis=-1)  # -1 means take last one
res

In [None]:
## So what i think i learned
# 0-> pages,
# 1-> rows,
# 2-> cols
# Axis (0, 1, 2) to a Physical Structure (Pages, Rows, Columns).

In [None]:
# so for 2d
# 0 -> rows
# 1 -> cols