In [None]:
# Phase 0: Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score, GridSearchCV, RandomizedSearchCV, learning_curve, KFold
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')
print("Setup complete")

In [None]:
# Phase 1: Load data
iris = load_iris()
X, y = iris.data, iris.target
X_scaled = StandardScaler().fit_transform(X)
print(f"Dataset: {X.shape}")

In [None]:
# Phase 2: K-Fold Cross-Validation
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=1000)
scores = cross_val_score(model, X_scaled, y, cv=5, scoring='accuracy')
print(f"Cross-validation scores: {scores}")
print(f"Mean: {scores.mean():.3f} (+/- {scores.std():.3f})")

In [None]:
# Phase 3: Grid Search
param_grid = {'C': [0.1, 1, 10], 'max_iter': [100, 1000]}
grid_search = GridSearchCV(LogisticRegression(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_scaled, y)
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best score: {grid_search.best_score_:.3f}")
print(f"\nResults DataFrame:")
results_df = pd.DataFrame(grid_search.cv_results_).iloc[:, [6, 7, 8, 9]]
print(results_df)

In [None]:
# Phase 4: SVM Grid Search
svm_param_grid = {'C': [1, 10, 100], 'kernel': ['linear', 'rbf'], 'gamma': ['scale', 'auto']}
svm_grid = GridSearchCV(SVC(), svm_param_grid, cv=5, n_jobs=-1)
svm_grid.fit(X_scaled, y)
print(f"Best SVM parameters: {svm_grid.best_params_}")
print(f"Best SVM score: {svm_grid.best_score_:.3f}")

In [None]:
# Phase 5: Random Search
from scipy.stats import randint
random_search = RandomizedSearchCV(
    RandomForestClassifier(),
    {'n_estimators': randint(10, 200), 'max_depth': randint(5, 20)},
    n_iter=10, cv=5, random_state=42
)
random_search.fit(X_scaled, y)
print(f"Best RF parameters: {random_search.best_params_}")
print(f"Best RF score: {random_search.best_score_:.3f}")

In [None]:
# Phase 6: Learning Curves
train_sizes, train_scores, test_scores = learning_curve(
    SVC(), X_scaled, y, cv=5, scoring='accuracy',
    train_sizes=np.linspace(0.1, 1.0, 10), n_jobs=-1
)

train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

plt.figure(figsize=(10, 6))
plt.plot(train_sizes, train_mean, 'o-', label='Training score')
plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.2)
plt.plot(train_sizes, test_mean, 's-', label='Test score')
plt.fill_between(train_sizes, test_mean - test_std, test_mean + test_std, alpha=0.2)
plt.xlabel('Training set size')
plt.ylabel('Accuracy')
plt.title('Learning Curves')
plt.legend()
plt.grid(alpha=0.3)
plt.show()

In [None]:
# Phase 7: Comparison
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

models = [
    ("Logistic Regression", LogisticRegression(max_iter=1000)),
    ("SVM", SVC()),
    ("Random Forest", RandomForestClassifier()),
    ("Decision Tree", DecisionTreeClassifier()),
    ("KNN", KNeighborsClassifier())
]

results = []
for name, model in models:
    scores = cross_val_score(model, X_scaled, y, cv=5, scoring='accuracy')
    results.append({'Model': name, 'Mean': scores.mean(), 'Std': scores.std()})

results_df = pd.DataFrame(results)
print(results_df)

plt.figure(figsize=(10, 6))
plt.errorbar(results_df['Model'], results_df['Mean'], yerr=results_df['Std'], 
             fmt='o', markersize=8, capsize=5)
plt.ylabel('Accuracy')
plt.title('Model Comparison (5-Fold CV)')
plt.xticks(rotation=45)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Phase 8: Testing and Validation
test_results = []

# Test 1: Cross-validation works
cv_model = LogisticRegression()
cv_scores = cross_val_score(cv_model, X_scaled, y, cv=5)
test1 = len(cv_scores) == 5 and cv_scores.min() > 0
test_results.append(("Test 1: 5-Fold CV", test1, f"Scores: {cv_scores}"))

# Test 2: Grid search finds best params
gs = GridSearchCV(LogisticRegression(max_iter=1000), {'C': [0.1, 1, 10]}, cv=3)
gs.fit(X_scaled, y)
test2 = hasattr(gs, 'best_params_') and hasattr(gs, 'best_score_')
test_results.append(("Test 2: Grid Search", test2, f"Best C: {gs.best_params_['C']}"))

# Test 3: Multiple models compared
test_models = [LogisticRegression(), SVC(), RandomForestClassifier()]
test_scores = [cross_val_score(m, X_scaled, y, cv=3).mean() for m in test_models]
test3 = len(test_scores) == 3 and all(s > 0 for s in test_scores)
test_results.append(("Test 3: Model Comparison", test3, f"Scores: {[f'{s:.2f}' for s in test_scores]}"))

# Test 4: Learning curve data valid
test4 = len(train_mean) == len(test_mean) and all(train_mean >= 0)
test_results.append(("Test 4: Learning Curves", test4, f"Curves length: {len(train_mean)}"))

# Test 5: Best model from grid search
best_model = gs.best_estimator_
best_score = gs.score(X_scaled, y)
test5 = best_score > 0.8 and isinstance(best_model, LogisticRegression)
test_results.append(("Test 5: Best Model Score", test5, f"Score: {best_score:.3f}"))

print("\n" + "="*60)
print("PRACTICAL 8: MODEL EVALUATION - TEST RESULTS")
print("="*60)
passed = 0
for test_name, result, details in test_results:
    status = "✅ PASS" if result else "❌ FAIL"
    print(f"{status} | {test_name}")
    print(f"       Details: {details}")
    if result:
        passed += 1

print(f"\nTotal: {passed}/{len(test_results)} tests passed")
print("="*60)

In [None]:
print("PRACTICAL 8 COMPLETE")