# Intermediate 2 — Pandas with scikit‑learn (Pipeline)

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load iris data created earlier
df = pd.read_csv(r"/mnt/data/pandas_fundamentals_challenges/data_iris.csv")

X = df.drop(columns=["species"])
y = df["species"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

# Pipeline: scale numeric features then train Logistic Regression
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(max_iter=1000, multi_class="auto"))
])

pipe.fit(X_train, y_train)
pred = pipe.predict(X_test)

acc = accuracy_score(y_test, pred)
print(f"Accuracy: {acc:.3f}")
print("\nClassification Report:\n", classification_report(y_test, pred))



**Explanation:** We used Pandas to prepare features/labels and scikit‑learn's `Pipeline` to standardize features and train a model.
This shows how Pandas integrates naturally with ML workflows.
