# Lab 1 — Easy ML Baseline (Built-in Dataset)

**Dataset:** Iris (built-in `sklearn.datasets`)

**Goal:** Train → Evaluate → Compare models.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

print('✅ Ready')

## 1) Load dataset

In [None]:
iris = load_iris(as_frame=True)
df = iris.frame

df.head()

## 2) Train/test split

In [None]:
X = df.drop(columns=['target'])
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

X_train.shape, X_test.shape

## 3) Baseline model — Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter=200)
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

print(classification_report(y_test, y_pred, target_names=iris.target_names))

## 4) Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels=iris.target_names)
disp.plot()
plt.show()

## 5) Compare with Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)

print(classification_report(y_test, y_pred_rf, target_names=iris.target_names))

## 6) Feature Importance (quick explainability)

In [None]:
importances = pd.Series(rf.feature_importances_, index=X.columns).sort_values(ascending=False)
importances.plot(kind='bar')
plt.title('Feature Importance (Random Forest)')
plt.show()