In [7]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [8]:
# 1. Load the Iris dataset
iris = datasets.load_iris()

In [9]:
X = iris.data  # Features
y=iris.target #Labels

In [23]:
# 2. Split the dataset into training and testing sets (80% train, 20% test)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [24]:
X_train

array([[4.6, 3.6, 1. , 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [6.7, 3.1, 4.4, 1.4],
       [4.8, 3.4, 1.6, 0.2],
       [4.4, 3.2, 1.3, 0.2],
       [6.3, 2.5, 5. , 1.9],
       [6.4, 3.2, 4.5, 1.5],
       [5.2, 3.5, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.2, 4.1, 1.5, 0.1],
       [5.8, 2.7, 5.1, 1.9],
       [6. , 3.4, 4.5, 1.6],
       [6.7, 3.1, 4.7, 1.5],
       [5.4, 3.9, 1.3, 0.4],
       [5.4, 3.7, 1.5, 0.2],
       [5.5, 2.4, 3.7, 1. ],
       [6.3, 2.8, 5.1, 1.5],
       [6.4, 3.1, 5.5, 1.8],
       [6.6, 3. , 4.4, 1.4],
       [7.2, 3.6, 6.1, 2.5],
       [5.7, 2.9, 4.2, 1.3],
       [7.6, 3. , 6.6, 2.1],
       [5.6, 3. , 4.5, 1.5],
       [5.1, 3.5, 1.4, 0.2],
       [7.7, 2.8, 6.7, 2. ],
       [5.8, 2.7, 4.1, 1. ],
       [5.2, 3.4, 1.4, 0.2],
       [5. , 3.5, 1.3, 0.3],
       [5.1, 3.8, 1.9, 0.4],
       [5. , 2. , 3.5, 1. ],
       [6.3, 2.7, 4.9, 1.8],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [5.6, 2

In [25]:
standard_scaler = StandardScaler()
X_train_standard = standard_scaler.fit_transform(X_train)
X_test_standard = standard_scaler.transform(X_test)

# 3. Apply Normalization (Min-Max Scaling)
min_max_scaler = MinMaxScaler()
X_train_normalized = min_max_scaler.fit_transform(X_train)
X_test_normalized = min_max_scaler.transform(X_test)

# 5. Train and evaluate the model using Standardized data
model = LogisticRegression(max_iter=200)
model.fit(X_train_standard, y_train)
y_pred_standard = model.predict(X_test_standard)
accuracy_standard = accuracy_score(y_test, y_pred_standard)

# 6. Train and evaluate the model using Normalized data
model.fit(X_train_normalized, y_train)
y_pred_normalized = model.predict(X_test_normalized)
accuracy_normalized = accuracy_score(y_test, y_pred_normalized)

# 7. Compare the accuracy
print(f"Accuracy with Standardization (Z-score): {accuracy_standard:.4f}")
print(f"Accuracy with Normalization (Min-Max): {accuracy_normalized:.4f}")

Accuracy with Standardization (Z-score): 1.0000
Accuracy with Normalization (Min-Max): 0.9667


Result: What does this mean?
Accuracy with Standardization (Z-score) = 1.0000:

The model achieved 100% accuracy on the test set when the data was standardized (i.e., when the features were rescaled such that each feature has a mean of 0 and a standard deviation of 1).
This suggests that standardization worked particularly well for this dataset and model, allowing the model to make perfect predictions. In this case, the logistic regression model was able to classify all the test samples correctly.
Accuracy with Normalization (Min-Max) = 0.9667:

The model achieved an accuracy of 96.67% when the data was normalized (i.e., scaled to a fixed range, usually [0, 1]).
This indicates that normalization still performed quite well, but the model made a few more mistakes compared to when the data was standardized.