In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
iris = load_iris()
X = iris.data
y = iris.target

In [None]:
df = pd.DataFrame(X, columns=iris.feature_names)
df['target'] = y
df['species'] = df['target'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

In [None]:
print("Dataset Info:")
print(df.info())
print("\nDataset Shape:", df.shape)
print("\nFirst 5 rows:")
print(df.head())

In [None]:
print("\nStatistical Summary:")
print(df.describe())

In [None]:
print("\nClass Distribution:")
print(df['species'].value_counts())

In [None]:
plt.figure(figsize=(15, 10))

plt.subplot(2, 3, 1)
sns.histplot(data=df, x='sepal length (cm)', hue='species', kde=True)
plt.title('Sepal Length Distribution')

plt.subplot(2, 3, 2)
sns.histplot(data=df, x='sepal width (cm)', hue='species', kde=True)
plt.title('Sepal Width Distribution')

plt.subplot(2, 3, 3)
sns.histplot(data=df, x='petal length (cm)', hue='species', kde=True)
plt.title('Petal Length Distribution')

plt.subplot(2, 3, 4)
sns.histplot(data=df, x='petal width (cm)', hue='species', kde=True)
plt.title('Petal Width Distribution')

plt.subplot(2, 3, 5)
sns.pairplot(df, hue='species', diag_kind='hist')
plt.title('Pairplot of Features')

plt.subplot(2, 3, 6)
correlation_matrix = df[iris.feature_names].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix')

plt.tight_layout()
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
models = {
    'Support Vector Machine': SVC(random_state=42),
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=200),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42, n_estimators=100),
    'Naive Bayes': GaussianNB()
}

In [None]:
results = {}

for name, model in models.items():
    if name in ['Support Vector Machine', 'Logistic Regression']:
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy
    
    print(f"\n{name} Results:")
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred, target_names=iris.target_names))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

In [None]:
best_model = max(results, key=results.get)
print(f"\nBest Model: {best_model} with accuracy: {results[best_model]:.4f}")

In [None]:
plt.figure(figsize=(10, 6))
model_names = list(results.keys())
accuracies = list(results.values())
bars = plt.bar(model_names, accuracies, color=['skyblue', 'lightgreen', 'lightcoral', 'gold', 'plum'])
plt.title('Model Comparison - Accuracy Scores')
plt.ylabel('Accuracy')
plt.xlabel('Models')
plt.xticks(rotation=45, ha='right')
plt.ylim(0, 1)

for bar, acc in zip(bars, accuracies):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
             f'{acc:.3f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

In [None]:
feature_importance_models = ['Decision Tree', 'Random Forest']
for name in feature_importance_models:
    if name == 'Decision Tree':
        model = DecisionTreeClassifier(random_state=42)
        model.fit(X_train, y_train)
        importances = model.feature_importances_
    elif name == 'Random Forest':
        model = RandomForestClassifier(random_state=42, n_estimators=100)
        model.fit(X_train, y_train)
        importances = model.feature_importances_
    
    plt.figure(figsize=(8, 5))
    indices = np.argsort(importances)[::-1]
    plt.bar(range(len(importances)), importances[indices])
    plt.title(f'Feature Importance - {name}')
    plt.xlabel('Features')
    plt.ylabel('Importance')
    plt.xticks(range(len(importances)), [iris.feature_names[i] for i in indices], rotation=45)
    plt.tight_layout()
    plt.show()