In [24]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report
import joblib
import random

# --- Reproducibility ---
SEED = 42
np.random.seed(SEED)
random.seed(SEED)

# --- Load Iris dataset ---
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y_class = iris.target  # For classification

# For regression target, predict 'petal length (cm)'
y_reg = X['petal length (cm)']

# Use other features except 'petal length (cm)' as predictors for regression
X_reg = X.drop(columns=['petal length (cm)'])

# --- Split data for regression ---
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=SEED
)

# Scale regression features
scaler_reg = StandardScaler()
X_train_reg_scaled = scaler_reg.fit_transform(X_train_reg)
X_test_reg_scaled = scaler_reg.transform(X_test_reg)

# --- Train Multiple Linear Regression ---
reg_model = LinearRegression()
reg_model.fit(X_train_reg_scaled, y_train_reg)

# Predict & evaluate regression
y_pred_reg = reg_model.predict(X_test_reg_scaled)
print("Multiple Linear Regression Results:")
print(f"R² Score: {r2_score(y_test_reg, y_pred_reg):.4f}")
print(f"Mean Squared Error: {mean_squared_error(y_test_reg, y_pred_reg):.4f}")

# Save regression model and scaler
joblib.dump(reg_model, "iris_linear_regression_model.pkl")
joblib.dump(scaler_reg, "iris_regression_scaler.pkl")


# --- Prepare data for classification (KNN) ---
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(
    X, y_class, test_size=0.2, stratify=y_class, random_state=SEED
)

# Scale classification features
scaler_clf = StandardScaler()
X_train_clf_scaled = scaler_clf.fit_transform(X_train_clf)
X_test_clf_scaled = scaler_clf.transform(X_test_clf)

# --- Evaluate KNN for different K values ---
print("\nKNN Classification Performance for different K values:")

best_k = None
best_acc = 0

for k in range(1, 16):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_clf_scaled, y_train_clf)
    y_pred_clf = knn.predict(X_test_clf_scaled)
    acc = accuracy_score(y_test_clf, y_pred_clf)
    print(f"K={k}: Accuracy = {acc:.4f}")
    if acc > best_acc:
        best_acc = acc
        best_k = k
        best_model = knn

print(f"\nBest K: {best_k} with accuracy {best_acc:.4f}")
print("Classification report for best K:")
print(classification_report(y_test_clf, best_model.predict(X_test_clf_scaled)))

# Save best KNN model and scaler
joblib.dump(best_model, f"iris_knn_k{best_k}_model.pkl")
joblib.dump(scaler_clf, "iris_knn_scaler.pkl")

print("\n Models and scalers saved.")


Multiple Linear Regression Results:
R² Score: 0.9603
Mean Squared Error: 0.1300

KNN Classification Performance for different K values:
K=1: Accuracy = 0.9667
K=2: Accuracy = 0.9333
K=3: Accuracy = 0.9333
K=4: Accuracy = 0.9333
K=5: Accuracy = 0.9333
K=6: Accuracy = 0.9333
K=7: Accuracy = 0.9667
K=8: Accuracy = 0.9333
K=9: Accuracy = 0.9667
K=10: Accuracy = 0.9667
K=11: Accuracy = 0.9667
K=12: Accuracy = 0.9667
K=13: Accuracy = 0.9667
K=14: Accuracy = 0.9667
K=15: Accuracy = 0.9667

Best K: 1 with accuracy 0.9667
Classification report for best K:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.91      1.00      0.95        10
           2       1.00      0.90      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30


 Models and scalers saved.
