In [10]:
# 📊 Assignment 6: Logistic Regression vs Random Forest
# (using built-in Iris dataset)

import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# --- Step 1: Load built-in Iris dataset ---
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target

print("Dataset Preview:")
df.head()

# --- Step 2: Features and Target ---
X = df.drop('target', axis=1)
y = df['target']

# --- Step 3: Train-test split ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# --- Step 4: Logistic Regression ---
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train, y_train)
log_pred = log_model.predict(X_test)

log_acc = accuracy_score(y_test, log_pred)
print("\nLogistic Regression Accuracy:", log_acc)

# --- Step 5: Random Forest ---
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

rf_acc = accuracy_score(y_test, rf_pred)
print("Random Forest Accuracy:", rf_acc)

# --- Step 6: Comparison ---
print("\n📈 Model Comparison:")
print(f"Logistic Regression Accuracy: {log_acc:.2f}")
print(f"Random Forest Accuracy: {rf_acc:.2f}")

if rf_acc > log_acc:
    print("✅ Random Forest performs better.")
else:
    print("✅ Logistic Regression performs better.")


Dataset Preview:

Logistic Regression Accuracy: 1.0
Random Forest Accuracy: 1.0

📈 Model Comparison:
Logistic Regression Accuracy: 1.00
Random Forest Accuracy: 1.00
✅ Logistic Regression performs better.
