**Load the "iris.csv" dataset and convert it to a NumPy array. Explor
the data types and shapes of the array.**

In [3]:
import numpy as np
import pandas as pd

# Load the dataset using pandas
df = pd.read_csv('Iris.csv')

# Convert the dataframe to a NumPy array
iris_array = df.to_numpy()

# Explore the data types and shapes
array_shape = iris_array.shape
array_dtypes = [type(item) for item in iris_array[0]]

array_shape, array_dtypes


((150, 6), [int, float, float, float, float, str])

**Perform Broadcasting Operations to Standardize (z- score normalization) the Numerical Features**

In [4]:
# Extract the numerical features (assuming they are in the first four columns)
numerical_features = iris_array[:, :-1].astype(float)

# Calculate the mean and standard deviation
mean = numerical_features.mean(axis=0)
std = numerical_features.std(axis=0)

# Perform z-score normalization
standardized_features = (numerical_features - mean) / std

standardized_features


array([[-1.72054204e+00, -9.00681170e-01,  1.03205722e+00,
        -1.34127240e+00, -1.31297673e+00],
       [-1.69744751e+00, -1.14301691e+00, -1.24957601e-01,
        -1.34127240e+00, -1.31297673e+00],
       [-1.67435299e+00, -1.38535265e+00,  3.37848329e-01,
        -1.39813811e+00, -1.31297673e+00],
       [-1.65125846e+00, -1.50652052e+00,  1.06445364e-01,
        -1.28440670e+00, -1.31297673e+00],
       [-1.62816394e+00, -1.02184904e+00,  1.26346019e+00,
        -1.34127240e+00, -1.31297673e+00],
       [-1.60506942e+00, -5.37177559e-01,  1.95766909e+00,
        -1.17067529e+00, -1.05003079e+00],
       [-1.58197489e+00, -1.50652052e+00,  8.00654259e-01,
        -1.34127240e+00, -1.18150376e+00],
       [-1.55888037e+00, -1.02184904e+00,  8.00654259e-01,
        -1.28440670e+00, -1.31297673e+00],
       [-1.53578584e+00, -1.74885626e+00, -3.56360566e-01,
        -1.34127240e+00, -1.31297673e+00],
       [-1.51269132e+00, -1.14301691e+00,  1.06445364e-01,
        -1.28440670e+00

**Compute the Correlation Matrix for the Numerical Features**

In [5]:
# Compute the correlation matrix
correlation_matrix = np.corrcoef(standardized_features, rowvar=False)

correlation_matrix


array([[ 1.        ,  0.71667627, -0.39772881,  0.88274732,  0.89975858],
       [ 0.71667627,  1.        , -0.10936925,  0.87175416,  0.81795363],
       [-0.39772881, -0.10936925,  1.        , -0.4205161 , -0.35654409],
       [ 0.88274732,  0.87175416, -0.4205161 ,  1.        ,  0.9627571 ],
       [ 0.89975858,  0.81795363, -0.35654409,  0.9627571 ,  1.        ]])

**Implement a Simple k-Nearest Neighbors Algorithm**

In [6]:
from collections import Counter

# Define the k-NN algorithm
def knn_predict(X_train, y_train, X_test, k=3):
    distances = np.sqrt(((X_train - X_test[:, np.newaxis]) ** 2).sum(axis=2))
    nearest_neighbors = np.argsort(distances, axis=1)[:, :k]
    top_k_labels = y_train[nearest_neighbors]
    predictions = [Counter(labels).most_common(1)[0][0] for labels in top_k_labels]
    return np.array(predictions)

# Prepare the data
X = standardized_features
y = iris_array[:, -1]

# Split the data into training and testing sets (80-20 split)
np.random.seed(0)
indices = np.random.permutation(len(X))
split_point = int(0.8 * len(X))
X_train, X_test = X[indices[:split_point]], X[indices[split_point:]]
y_train, y_test = y[indices[:split_point]], y[indices[split_point:]]

# Make predictions
k = 3
predictions = knn_predict(X_train, y_train, X_test, k)

# Evaluate the predictions
accuracy = np.mean(predictions == y_test)
accuracy


1.0

**Additional Numerical Operations or Data Manipulation**

In [7]:
# Calculate the mean of each feature for each class
unique_classes = np.unique(y)
mean_per_class = np.array([standardized_features[y == cls].mean(axis=0) for cls in unique_classes])

mean_per_class


array([[-1.15472620e+00, -1.01457897e+00,  8.42306793e-01,
        -1.30487835e+00, -1.25512862e+00],
       [ 8.88178420e-18,  1.12282227e-01, -6.57184421e-01,
         2.85086729e-01,  1.67408915e-01],
       [ 1.15472620e+00,  9.02296742e-01, -1.85122372e-01,
         1.01979162e+00,  1.08771971e+00]])