In [21]:
# 1. Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import seaborn as sns

In [22]:
# 2. Load Dataset from CSV
#df = pd.read_csv('iris.csv')
df = sns.load_dataset('iris')

In [23]:
# 3. Data Exploration
print("First 5 Rows:\n", df.head())
print("\nShape of Dataset:", df.shape)
print("\nColumn Names:", df.columns.tolist())
print("\nSummary Statistics:\n", df.describe())
print("\nMissing Values:\n", df.isnull().sum())
print("\nData Types:\n", df.dtypes)

#If missing values present 
#df['column_name'] = df['column_name'].fillna(df['column_name'].mean()) .........for int/float
#df['column_name'] = df['column_name'].fillna(df['column_name'].mode()[0]) ......for string/object

First 5 Rows:
    sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.2           1.3          0.2  setosa
3           4.6          3.1           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa

Shape of Dataset: (150, 5)

Column Names: ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']

Summary Statistics:
        sepal_length  sepal_width  petal_length  petal_width
count    150.000000   150.000000    150.000000   150.000000
mean       5.843333     3.057333      3.758000     1.199333
std        0.828066     0.435866      1.765298     0.762238
min        4.300000     2.000000      1.000000     0.100000
25%        5.100000     2.800000      1.600000     0.300000
50%        5.800000     3.000000      4.350000     1.300000
75%        6.400000     3.300000      5.10

In [24]:
# 5. Label Encoding
if df['species'].dtype == 'object':
    le = LabelEncoder()
    df['species'] = le.fit_transform(df['species'])

# 6. Feature and Target Split
X = df.drop('species', axis=1)
y = df['species']

# 7. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# 8. Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [25]:
# KNN Model Training
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)

# SVM Model Training
svm_model = SVC()
svm_model.fit(X_train, y_train)

# Random Forest Model Training
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

In [26]:
def evaluate_model(name, model, X_test, y_test):
    y_pred = model.predict(X_test)
    print(f"\n=== {name} Evaluation ===")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


In [27]:
# Evaluate KNN
evaluate_model("KNN", knn_model, X_test, y_test)

# Evaluate SVM
evaluate_model("SVM", svm_model, X_test, y_test)

# Evaluate Random Forest
evaluate_model("Random Forest", rf_model, X_test, y_test)


=== KNN Evaluation ===
Accuracy: 0.9833333333333333
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       0.95      1.00      0.97        19
           2       1.00      0.94      0.97        18

    accuracy                           0.98        60
   macro avg       0.98      0.98      0.98        60
weighted avg       0.98      0.98      0.98        60

Confusion Matrix:
 [[23  0  0]
 [ 0 19  0]
 [ 0  1 17]]

=== SVM Evaluation ===
Accuracy: 0.9833333333333333
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       0.95      1.00      0.97        19
           2       1.00      0.94      0.97        18

    accuracy                           0.98        60
   macro avg       0.98      0.98      0.98        60
weighted avg       0.98      0.98      0.98        60

Confusion Matrix:
 [[23  0  