# Imports

In [1]:
import numpy as np

# Topics

In [2]:
np.random.seed(0)
a =  np.random.rand(4)
b = np.random.rand(4)
c = np.random.random((2,4))
d = np.random.random((4,2))
print(a)
print(b)
print(c)
print(d)

[0.5488135  0.71518937 0.60276338 0.54488318]
[0.4236548  0.64589411 0.43758721 0.891773  ]
[[0.96366276 0.38344152 0.79172504 0.52889492]
 [0.56804456 0.92559664 0.07103606 0.0871293 ]]
[[0.0202184  0.83261985]
 [0.77815675 0.87001215]
 [0.97861834 0.79915856]
 [0.46147936 0.78052918]]


## np.vstack
- Stack arrays in sequence vertically (row wise).
- This is equivalent to concatenation along the first axis after 1-D arrays of shape (N,) have been reshaped to (1,N).


In [13]:
np.vstack((a,b))

array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ]])

In [25]:
np.concatenate((a,b), axis=0) # concatenate doesn't like vstack in 1D arrays

array([0.5488135 , 0.71518937, 0.60276338, 0.54488318, 0.4236548 ,
       0.64589411, 0.43758721, 0.891773  ])

In [20]:
np.vstack((c,d.T))

array([[0.96366276, 0.38344152, 0.79172504, 0.52889492],
       [0.56804456, 0.92559664, 0.07103606, 0.0871293 ],
       [0.0202184 , 0.77815675, 0.97861834, 0.46147936],
       [0.83261985, 0.87001215, 0.79915856, 0.78052918]])

In [23]:
np.concatenate((c,d.T),axis=0)

array([[0.96366276, 0.38344152, 0.79172504, 0.52889492],
       [0.56804456, 0.92559664, 0.07103606, 0.0871293 ],
       [0.0202184 , 0.77815675, 0.97861834, 0.46147936],
       [0.83261985, 0.87001215, 0.79915856, 0.78052918]])

In [30]:
np.vstack((c,a,b,d.T))

array([[0.96366276, 0.38344152, 0.79172504, 0.52889492],
       [0.56804456, 0.92559664, 0.07103606, 0.0871293 ],
       [0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ],
       [0.0202184 , 0.77815675, 0.97861834, 0.46147936],
       [0.83261985, 0.87001215, 0.79915856, 0.78052918]])

## np.hstack
- Stack arrays in sequence horizontally (column wise).
- This is equivalent to concatenation along the second axis, except for 1-D arrays where it concatenates along the first axis.

In [31]:
np.hstack((a,b)) # for 1-D arrays 

array([0.5488135 , 0.71518937, 0.60276338, 0.54488318, 0.4236548 ,
       0.64589411, 0.43758721, 0.891773  ])

In [32]:
np.concatenate((a,b)) #same as hstack 

array([0.5488135 , 0.71518937, 0.60276338, 0.54488318, 0.4236548 ,
       0.64589411, 0.43758721, 0.891773  ])

In [40]:
a.reshape(-1,1) # convert to one column , n rows

array([[0.5488135 ],
       [0.71518937],
       [0.60276338],
       [0.54488318]])

In [35]:
np.hstack((a.reshape(-1,1),b.reshape(-1,1)))

array([[0.5488135 , 0.4236548 ],
       [0.71518937, 0.64589411],
       [0.60276338, 0.43758721],
       [0.54488318, 0.891773  ]])

In [39]:
np.concatenate((a.reshape(-1,1),b.reshape(-1,1)), axis=1)

array([[0.5488135 , 0.4236548 ],
       [0.71518937, 0.64589411],
       [0.60276338, 0.43758721],
       [0.54488318, 0.891773  ]])

In [38]:
np.hstack((d,c.T))

array([[0.0202184 , 0.83261985, 0.96366276, 0.56804456],
       [0.77815675, 0.87001215, 0.38344152, 0.92559664],
       [0.97861834, 0.79915856, 0.79172504, 0.07103606],
       [0.46147936, 0.78052918, 0.52889492, 0.0871293 ]])

## np.column_stack
- Stack 1-D arrays as columns into a 2-D array.
- Take a sequence of 1-D arrays and stack them as columns to make a single 2-D array.
- 2-D arrays are stacked as-is, just like with hstack.

In [41]:
np.column_stack((a,b)) #=> no need for shape if they are 1D array with same length

array([[0.5488135 , 0.4236548 ],
       [0.71518937, 0.64589411],
       [0.60276338, 0.43758721],
       [0.54488318, 0.891773  ]])

In [55]:
np.column_stack((c.T,d)) # => hstack like

array([[0.96366276, 0.56804456, 0.0202184 , 0.83261985],
       [0.38344152, 0.92559664, 0.77815675, 0.87001215],
       [0.79172504, 0.07103606, 0.97861834, 0.79915856],
       [0.52889492, 0.0871293 , 0.46147936, 0.78052918]])

In [49]:
np.column_stack((c.flatten(),d.flatten()))

array([[0.96366276, 0.0202184 ],
       [0.38344152, 0.83261985],
       [0.79172504, 0.77815675],
       [0.52889492, 0.87001215],
       [0.56804456, 0.97861834],
       [0.92559664, 0.79915856],
       [0.07103606, 0.46147936],
       [0.0871293 , 0.78052918]])

## np.vsplit
- Split an array into multiple sub-arrays vertically (row-wise).
- vsplit is equivalent to split with axis=0 (default), the array is always split along the first axis regardless of the array dimension.


In [59]:
np.vsplit(np.vstack((c,d.T,a,b)) , 3)  #=> 3 is how many sub arrays you want to create , assert divisibility

[array([[0.96366276, 0.38344152, 0.79172504, 0.52889492],
        [0.56804456, 0.92559664, 0.07103606, 0.0871293 ]]),
 array([[0.0202184 , 0.77815675, 0.97861834, 0.46147936],
        [0.83261985, 0.87001215, 0.79915856, 0.78052918]]),
 array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
        [0.4236548 , 0.64589411, 0.43758721, 0.891773  ]])]

## np.hsplit
- Split an array into multiple sub-arrays horizontally (column-wise).
- hsplit is equivalent to split with axis=1, the array is always split along the second axis regardless of the array dimension.

In [62]:
np.hsplit(np.vstack((c,d.T,a,b)) , 4)  #=> 2 is how many sub arrays you want to create , assert divisibility

[array([[0.96366276],
        [0.56804456],
        [0.0202184 ],
        [0.83261985],
        [0.5488135 ],
        [0.4236548 ]]),
 array([[0.38344152],
        [0.92559664],
        [0.77815675],
        [0.87001215],
        [0.71518937],
        [0.64589411]]),
 array([[0.79172504],
        [0.07103606],
        [0.97861834],
        [0.79915856],
        [0.60276338],
        [0.43758721]]),
 array([[0.52889492],
        [0.0871293 ],
        [0.46147936],
        [0.78052918],
        [0.54488318],
        [0.891773  ]])]

## LDA vs PCA once more

In [87]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Generate sample data
np.random.seed(0)
n_samples = 1000
x = np.random.rand(n_samples)
y = np.random.rand(n_samples)
z = 2 * x + np.random.randn(n_samples)  # Linear relationship between x and z

# Combine features into a single dataset
data = np.column_stack((x, y, z))

# Apply PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(data)

print("PCA Results:")
print("Explained Variance Ratio:", pca.explained_variance_ratio_)
print("PCA Components:")
print(pca.components_)

# # Apply LDA
# lda = LinearDiscriminantAnalysis(n_components=2)
# lda_result = lda.fit_transform(data[:,:-1],data[:,-1:].ravel())  # Using x as the class labels for demonstration
 
# print("\nLDA Results:")
# print("Explained Variance Ratio:", lda.explained_variance_ratio_)
# print("LDA Components:")
# print(lda.scalings_)


PCA Results:
Explained Variance Ratio: [0.89419783 0.06238309]
PCA Components:
[[-1.34506057e-01  8.81030751e-03 -9.90873604e-01]
 [-7.25836713e-02 -9.97361840e-01  9.84867217e-04]]
