In [1]:
# Import libraries
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load the Iris dataset into a pandas DataFrame
iris = load_iris()
dataset = pd.DataFrame(data=iris.data, columns=iris.feature_names)
dataset['target'] = iris.target

# Display first 5 rows
print("Dataset successfully loaded. Here are the first 5 rows:")
print(dataset.head())
print("\n" + "="*50 + "\n")

# Define features and target
X = dataset.drop('target', axis=1)
y = dataset['target']

print(f"Features (X) shape: {X.shape}")
print(f"Target (y) shape: {y.shape}\n")

# ---------------------------------------------------
# 1️⃣ Performing a 2-Way Split (80% Train, 20% Test)
# ---------------------------------------------------
print("--- 1. Performing a 2-Way Split (80% Train, 20% Test) ---")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")
print("\n" + "="*50 + "\n")

# ---------------------------------------------------
# 2️⃣ Performing a 3-Way Split (70% Train, 15% Validation, 15% Test)
# ---------------------------------------------------
print("--- 2. Performing a 3-Way Split (70% Train, 15% Validation, 15% Test) ---")

# Step 1: Separate 15% test set
X_train_val, X_test_3way, y_train_val, y_test_3way = train_test_split(
    X, y, test_size=0.15, random_state=42, stratify=y
)

# Step 2: From remaining 85%, take 15/85 (~0.176) as validation
val_size_relative = 0.15 / 0.85
X_train_3way, X_val, y_train_3way, y_val = train_test_split(
    X_train_val, y_train_val, test_size=val_size_relative, random_state=42, stratify=y_train_val
)

# Print sizes
print(f"Total original samples: {len(X)}")
print(f"Training set size: {len(X_train_3way)} ({len(X_train_3way)/len(X):.0%})")
print(f"Validation set size: {len(X_val)} ({len(X_val)/len(X):.0%})")
print(f"Test set size: {len(X_test_3way)} ({len(X_test_3way)/len(X):.0%})")

print("\nFinal Shapes:")
print(f"X_train shape: {X_train_3way.shape}")
print(f"X_val shape:   {X_val.shape}")
print(f"X_test shape:  {X_test_3way.shape}")


Dataset successfully loaded. Here are the first 5 rows:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  


Features (X) shape: (150, 4)
Target (y) shape: (150,)

--- 1. Performing a 2-Way Split (80% Train, 20% Test) ---
X_train shape: (120, 4)
X_test shape: (30, 4)
y_train shape: (120,)
y_test shape: (30,)


--- 2. Performing a 3-Way Split (70% Train, 15% Validation, 15% Test) ---
Total original samples: 150
Training set size: 104 (69%)
Validation set size: 23 (15%)
Test set size: 23 (15%)

Final 