#### 																				*1. Import Packages*

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sk
import seaborn as sns
import statistics
from scipy import stats
from sklearn.preprocessing import MinMaxScaler
from mpl_toolkits.mplot3d import Axes3D
from itertools import combinations
from itertools import permutations
from sklearn.preprocessing import RobustScaler
from scipy.spatial.distance import pdist, squareform


#### 																				*2. Dataset Space*

In [None]:
# Generate some sample data
x1 = [0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444, 0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.,0.8]
y1 = [0.04967142, 0.09728468, 0.28699108, 0.48563632, 0.42102911, 0.53214186, 0.82458795, 0.85452125, 0.84194145, 1.054256 ,1.6]

# Combine the data into a DataFrame
x2 = pd.DataFrame({'X': x1, 'Y': y1})
s_row, s_col = x2.shape
cols = list(x2.columns)

# Print the DataFrame
print(x2)
print('\n')
print('Number of samples:', s_row)
print('Number of features:', s_col)
print('\n')

# Create a scatter plot
plt.scatter(x2['X'], x2['Y'])

# Add axis labels and title
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Scatter plot of X and Y')

# Show the plot
plt.show()

# Perform robust scaling
scaler = RobustScaler()
x = pd.DataFrame(scaler.fit_transform(x2))

print(x)

# Create a scatter plot
plt.scatter(x[0], x[1])

# Add axis labels and title
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Scatter plot of X and Y')

# Show the plot
plt.show()




#### *3. CO-MAD - Correlation Matrix Generator*

In [None]:
x_MAD = []
for column in x:
    x_MAD.append(np.absolute(x[column]-x[column].median()))
MAD = pd.DataFrame(x_MAD).T
print(MAD)

list_x = MAD.columns
comb = permutations(list_x, 2)
# Print the obtained combinations

perms = []
for i in list(comb):
    perms.append(i)
#print(perms)

diag = []
a = MAD.columns
for i in a:
    diag.append(tuple((i, i)))
#print(diag) 

corr_elems = diag + perms
print('\n')
print(f'The elements of the corr matrix are {corr_elems}')
print('\n')
print(f'The values of the corr_elems are')
print('\n')

corr_mat = []
for i in corr_elems:
    corr_mat.append(MAD[i[0]].append(MAD[i[1]], ignore_index = True).median(axis = 0))
    print(f'The median of the columns ({i[0]},{i[1]}) -> {(MAD[i[0]].append(MAD[i[1]], ignore_index = True).median(axis = 0))}')

print('\n')
matrix = np.diag(corr_mat[0:s_col])
print(matrix)
print('\n')
k = 0
for i in range(matrix.shape[1]):
    for j in range(matrix.shape[1]):
        if i != j:
            matrix[i][j] = corr_mat[s_col:][k]
            k += 1

print(f"Correlation matrix : \n\n {matrix}")

#### *4. Eigen-Values & Eigen-Vectors*

In [None]:
# Compute the eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(matrix)

# Sort the eigenvalues and eigenvectors in descending order
idx = eigenvalues.argsort()[::-1]   
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:,idx]

# Print the sorted eigenvalues and eigenvectors
print("Eigenvalues:")
print(eigenvalues)
print("\nEigenvectors:")
print(eigenvectors)

eig_vals_total = sum(eigenvalues)
explained_variance = [(i / eig_vals_total)*100 for i in eigenvalues]
explained_variance = np.round(explained_variance, 2)
cum_explained_variance = np.cumsum(explained_variance)
print('\nExplained variance             : {}'.format(explained_variance))

print('Cumulative explained variance  : {}'.format(cum_explained_variance))

#### *5. Selecting 'k' eigen-values for transformation*

In [None]:
# Select 'k' no of eigen vectors
k = 1
W = eigenvectors[:k, :] # Projection matrix

# Transform the original dataset
X_proj = x.dot(W.T)

print(f'Shape of projection : {W.shape}')
print('\nNew projection:')
print(X_proj)

#### *6. PCA projection*

In [None]:
plt.scatter(x[0], x[1])
plt.arrow(x[0].mean(), x[1].mean(), eigenvectors[0, 0]*sc, eigenvectors[0, 1], color='r', width=0.01, shape='left')
plt.title("PCA Projection")
plt.xlabel("PC1")
plt.show()

#### *7. Distance Matrix of new projected points*

In [None]:
SF = pd.DataFrame(squareform(pdist(X_proj, metric='euclidean')))
print(f'\nSquare Form Matrix:\n{SF}')