In [3]:
import numpy as np
import pandas as pd

# Load the Iris dataset again
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
iris_df = pd.read_csv(url, header=None, names=column_names)

# Extract the numerical features into a NumPy array
numerical_data = iris_df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].values

# --- Basic NumPy Operations ---

# 1. Calculate the mean of each feature (column)
mean_sepal_length = np.mean(numerical_data[:, 0])
mean_sepal_width = np.mean(numerical_data[:, 1])
mean_petal_length = np.mean(numerical_data[:, 2])
mean_petal_width = np.mean(numerical_data[:, 3])

print("Mean of each feature:")
print(f"  Sepal Length: {mean_sepal_length:.2f}")
print(f"  Sepal Width:  {mean_sepal_width:.2f}")
print(f"  Petal Length: {mean_petal_length:.2f}")
print(f"  Petal Width:  {mean_petal_width:.2f}")
print("\n")

# Alternatively, calculate the mean across all features for each sample (row)
mean_per_sample = np.mean(numerical_data, axis=1)
print("Mean of the features for the first 5 samples:")
print(mean_per_sample[:5])
print("\n")

# 2. Calculate the standard deviation of each feature
std_dev_sepal_length = np.std(numerical_data[:, 0])
std_dev_sepal_width = np.std(numerical_data[:, 1])
std_dev_petal_length = np.std(numerical_data[:, 2])
std_dev_petal_width = np.std(numerical_data[:, 3])

print("Standard deviation of each feature:")
print(f"  Sepal Length: {std_dev_sepal_length:.2f}")
print(f"  Sepal Width:  {std_dev_sepal_width:.2f}")
print(f"  Petal Length: {std_dev_petal_length:.2f}")
print(f"  Petal Width:  {std_dev_petal_width:.2f}")
print("\n")

# 3. Find the minimum and maximum value of each feature
min_values = np.min(numerical_data, axis=0)
max_values = np.max(numerical_data, axis=0)

print("Minimum values for each feature:", min_values)
print("Maximum values for each feature:", max_values)
print("\n")

# 4. Calculate the median of each feature
median_sepal_length = np.median(numerical_data[:, 0])
median_sepal_width = np.median(numerical_data[:, 1])
median_petal_length = np.median(numerical_data[:, 2])
median_petal_width = np.median(numerical_data[:, 3])

print("Median of each feature:")
print(f"  Sepal Length: {median_sepal_length:.2f}")
print(f"  Sepal Width:  {median_sepal_width:.2f}")
print(f"  Petal Length: {median_petal_length:.2f}")
print(f"  Petal Width:  {median_petal_width:.2f}")
print("\n")

# 5. Reshape the NumPy array (just as an example operation)
reshaped_data = numerical_data.reshape((50, 3, 4)) # Reshape into 50 samples, 3 "groups" of 4 features
print("Reshaped data (first sample):")
print(reshaped_data[0])
print("Original shape:", numerical_data.shape)
print("New shape:", reshaped_data.shape)

Mean of each feature:
  Sepal Length: 5.84
  Sepal Width:  3.05
  Petal Length: 3.76
  Petal Width:  1.20


Mean of the features for the first 5 samples:
[2.55  2.375 2.35  2.35  2.55 ]


Standard deviation of each feature:
  Sepal Length: 0.83
  Sepal Width:  0.43
  Petal Length: 1.76
  Petal Width:  0.76


Minimum values for each feature: [4.3 2.  1.  0.1]
Maximum values for each feature: [7.9 4.4 6.9 2.5]


Median of each feature:
  Sepal Length: 5.80
  Sepal Width:  3.00
  Petal Length: 4.35
  Petal Width:  1.30


Reshaped data (first sample):
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]]
Original shape: (150, 4)
New shape: (50, 3, 4)
