In [1]:
import numpy as np
import pandas as pd

In [2]:
def standardize_data(data):
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    standardized_data = (data - mean) / std
    return standardized_data

In [3]:
def compute_covariance_matrix(data):
    return np.cov(data, rowvar=False)

In [4]:
def calculate_eigenvalues_and_eigenvectors(cov_matrix):
    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
    return eigenvalues, eigenvectors

In [5]:
def select_top_k_components(eigenvalues, eigenvectors, k):
    sorted_indices = np.argsort(eigenvalues)[::-1]  # Sort in descending order
    top_k_indices = sorted_indices[:k]
    top_k_eigenvectors = eigenvectors[:, top_k_indices]
    return top_k_eigenvectors

In [6]:
def project_data(data, top_k_eigenvectors):
    return np.dot(data, top_k_eigenvectors)

In [7]:
def dimensionality_reduction_pipeline(data, k):
    standardized_data = standardize_data(data)
    cov_matrix = compute_covariance_matrix(standardized_data)
    eigenvalues, eigenvectors = calculate_eigenvalues_and_eigenvectors(cov_matrix)
    top_k_eigenvectors = select_top_k_components(eigenvalues, eigenvectors, k)
    reduced_data = project_data(standardized_data, top_k_eigenvectors)
    return reduced_data

In [18]:
csv_file_path = "C:/Users/PRIYANKA B/Machine Learning Models/Dimensionality Reduction/assembly_line_data.csv"
data = pd.read_csv(csv_file_path)
data

Unnamed: 0,temperature,pressure,speed,vibration,torque,humidity,current,voltage,load,cycle_time,defect
0,52.483571,113.993554,186.496435,20.460962,68.092051,42.033682,12.771837,231.777776,59.669747,131.481032,1
1,49.308678,109.246337,197.109627,25.698075,74.750372,41.826101,13.738138,193.334786,54.963497,136.101191,0
2,53.238443,100.596304,184.151602,27.931972,75.144135,32.430498,13.115880,230.721185,58.276250,127.480349,0
3,57.615149,93.530632,193.840769,39.438438,78.781043,42.689369,13.904008,216.494139,67.147317,90.862535,1
4,48.829233,106.982233,162.127707,32.782766,64.065133,50.129804,14.571699,230.611866,72.778567,117.668664,0
...,...,...,...,...,...,...,...,...,...,...,...
995,48.594499,110.701502,201.549610,30.142288,74.608280,46.200285,14.404266,253.147835,61.016301,139.516531,1
996,58.988433,99.734787,205.155051,19.610941,80.691285,53.068537,16.538063,224.904918,71.672177,90.024825,1
997,53.204214,91.181253,175.164788,28.398511,99.903282,36.478173,13.945222,207.473132,75.884472,109.420249,1
998,47.144105,98.369330,206.683528,38.216891,81.464290,48.275653,14.989787,244.793696,53.150128,127.436484,0


In [19]:
features = data.iloc[:, :-1].values 
labels = data.iloc[:, -1].values   

In [20]:
# Reduce to 2 dimensions
k = 2
reduced_data = dimensionality_reduction_pipeline(features, k)

In [21]:
print("Reduced Data:")
print(reduced_data)

Reduced Data:
[[-0.03371232  2.66539744]
 [ 1.65651492  1.1243931 ]
 [ 0.85441811  0.90104495]
 ...
 [ 0.98258054 -0.83197044]
 [-0.38361132 -0.64991731]
 [-0.53912085  0.13820155]]
