In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

def kmeans_clustering(dataset):
    # Remove the 'Species' column and store it for future comparison
    species = dataset['Species']
    dataset = dataset.drop('Species', axis=1)
    
    # Convert the dataset to a NumPy array
    data = dataset.values
    
    # Initialize centroids randomly
    np.random.seed(0)
    centroids = data[np.random.choice(range(len(data)), 3, replace=False)]
    
    # Initialize variables
    num_iter = 100
    cluster_labels = np.zeros(len(data))
    
    # Perform K-Means Clustering
    for _ in range(num_iter):
        # Assign each data point to the nearest centroid
        for i, point in enumerate(data):
            distances = np.linalg.norm(centroids - point, axis=1)
            cluster_labels[i] = np.argmin(distances)
        
        # Update centroids by calculating the mean of each cluster
        for j in range(3):
            centroids[j] = np.mean(data[cluster_labels == j], axis=0)
    
    # Plot the output of the clustering
    plt.scatter(data[:, 0], data[:, 1], c=cluster_labels, cmap='viridis')
    plt.scatter(centroids[:, 0], centroids[:, 1], marker='*', s=200, c='red', label='Centroids')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title('K-Means Clustering')
    plt.legend()
    plt.show()
    
    # Plot the actual species
    plt.scatter(data[:, 0], data[:, 1], c=species.cat.codes, cmap='viridis')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title('Actual Species')
    plt.show()


def pca_analysis(dataset):
    # Remove the 'Species' column and store it for future comparison
    species = dataset['Species']
    dataset = dataset.drop('Species', axis=1)
    
    # Convert the dataset to a NumPy array
    data = dataset.values
    
    # Calculate the covariance matrix
    covariance_matrix = np.cov(data.T)
    
    # Calculate the eigenvalues and eigenvectors
    eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
    
    # Sort the eigenvalues and corresponding eigenvectors in descending order
    sorted_indices = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sorted_indices]
    eigenvectors = eigenvectors[:, sorted_indices]
    
    # Select the first three eigenvectors
    selected_eigenvectors = eigenvectors[:, :3]
    
    # Project the data onto the selected eigenvectors
    transformed_data = np.dot(data, selected_eigenvectors)
    
    # Plot the data in the first three eigenvectors
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(transformed_data[:, 0], transformed_data[:, 1], transformed_data[:, 2], c=species.cat.codes, cmap='viridis')
    ax.set_xlabel('PC1')
    ax.set_ylabel('PC2')
