In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

# 1. Load data
X, y = load_iris(return_X_y=True)

# 2. First split: Create the final Test set (20%)
# The remaining 80% will be split again.
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Second split: Split the 80% into Train (60%) and Validation (20%)
# We want 20% validation, which is 25% of the remaining 80%. (0.25 * 0.80 = 0.20)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, random_state=42)

print(f"Total samples: {len(X)}")
print(f"Training samples: {len(X_train)} (60%)")
print(f"Validation samples: {len(X_val)} (20%)")
print(f"Test samples: {len(X_test)} (20%)")
print("---")

# 4. Create and train the model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# 5. Evaluate on the VALIDATION set
# In a real project, you would use this score to tune hyperparameters.
val_pred = model.predict(X_val)
val_accuracy = accuracy_score(y_val, val_pred)
print(f"Model Accuracy on Validation Set: {val_accuracy * 100:.2f}%")

# 6. Finally, evaluate on the unseen TEST set
# This is the final, true score of your model.
test_pred = model.predict(X_test)
test_accuracy = accuracy_score(y_test, test_pred)
print(f"Model Accuracy on Final Test Set: {test_accuracy * 100:.2f}%")