In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Create a DataFrame for better visualization
iris_df = pd.DataFrame(data=np.c_[X, y], columns=iris.feature_names + ['target'])

# Statistical Analysis and Visualization
summary_stats = iris_df.describe()
correlation_matrix = iris_df.corr()

# Visualize the distribution of the target variable
plt.figure(figsize=(8, 5))
plt.hist(y, bins=[0, 1, 2, 3], align='left', edgecolor='black', alpha=0.7)
plt.xticks([0, 1, 2])
plt.xlabel('Species')
plt.ylabel('Count')
plt.title('Distribution of Iris Species')
plt.show()

# Preprocessing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model Selection and Training
logreg = LogisticRegression(random_state=42)
knn = KNeighborsClassifier(n_neighbors=3)

logreg.fit(X_train_scaled, y_train)
knn.fit(X_train_scaled, y_train)

# Evaluation
def evaluate_model(model, X_test_scaled, y_test):
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    confusion_mat = confusion_matrix(y_test, y_pred)
    classification_rep = classification_report(y_test, y_pred)
    
    return accuracy, confusion_mat, classification_rep

# Evaluate Logistic Regression
accuracy_logreg, confusion_mat_logreg, classification_rep_logreg = evaluate_model(logreg, X_test_scaled, y_test)

# Evaluate k-Nearest Neighbors
accuracy_knn, confusion_mat_knn, classification_rep_knn = evaluate_model(knn, X_test_scaled, y_test)

# Results and Discussion
print("Logistic Regression Accuracy:", accuracy_logreg)
print("\nConfusion Matrix (Logistic Regression):\n", confusion_mat_logreg)
print("\nClassification Report (Logistic Regression):\n", classification_rep_logreg)

print("\nK-Nearest Neighbors Accuracy:", accuracy_knn)
print("\nConfusion Matrix (K-Nearest Neighbors):\n", confusion_mat_knn)
print("\nClassification Report (K-Nearest Neighbors):\n", classification_rep_knn)
