In [None]:
import numpy as np
import matplotlib.pyplot as plt

Function to load the dataset from a CSV file

In [None]:
def load_dataset(Iris):
    data = []
    species = []
    with open('D:\Internships\Iris.csv') as file:
        lines = file.readlines()
        header = lines[0].strip().split(',')
        for line in lines[1:]:
            values = line.strip().split(',')
            data.append([float(x) for x in values[1:5]])  # Exclude the Id column
            species.append(values[5])
    return np.array(data), species

Function to perform Principal Component Analysis

In [None]:
def pca(dataset, k):
    # Compute the mean of each feature
    mean = np.mean(dataset, axis=0)

    # Center the data by subtracting the mean
    centered_data = dataset - mean

    # Compute the covariance matrix of the centered data
    covariance_matrix = np.cov(centered_data, rowvar=False)

    # Compute the eigenvectors and eigenvalues of the covariance matrix
    eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)

    # Sort the eigenvectors based on the eigenvalues in descending order
    sorted_indices = np.argsort(eigenvalues)[::-1]
    sorted_eigenvalues = eigenvalues[sorted_indices]
    sorted_eigenvectors = eigenvectors[:, sorted_indices]

    # Select the top k eigenvectors based on the explained variance
    selected_eigenvectors = sorted_eigenvectors[:, :k]

    # Project the centered data onto the selected eigenvectors
    principal_components = np.dot(centered_data, selected_eigenvectors)
    return principal_components, sorted_eigenvalues[:k], selected_eigenvectors

Function to visualize the results

In [None]:
def plot_results(principal_components, eigenvalues):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(principal_components[:, 0], principal_components[:, 1], principal_components[:, 2], c='blue')
    ax.set_xlabel('PC1')
    ax.set_ylabel('PC2')
    ax.set_zlabel('PC3')
    ax.set_title('Principal Component Analysis')
    plt.figure()
    plt.plot(np.arange(1, len(eigenvalues) + 1), eigenvalues, 'bo-')
    plt.xlabel('Principal Component')
    plt.ylabel('Eigenvalue')
    plt.title('Eigenvalues')
    plt.show()

Main function

In [None]:
def main():
    # Load the dataset
    filename = 'Iris.csv'
    dataset, species = load_dataset('D:\Internships\Iris.csv')

    # Remove the "Species" column
    dataset_without_species = dataset[:, :-1]

    # Set the number of principal components
    k = 3

    # Perform Principal Component Analysis
    principal_components, eigenvalues, _ = pca(dataset_without_species, k)

    # Visualize the results
    plot_results(principal_components, eigenvalues)

Run the main function

In [None]:
if __name__ == '__main__':
    main()