### Assignment-4

**Objective:**

Understand and implement model evaluation using cross-validation and improve model performance by hyperparameter tuning.

Step 1: Import Libraries and Load Data

In [6]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

Step 2: Load Dataset and Prepare Features and Target

In [7]:
iris = datasets.load_iris()
X = iris.data  # Features
y = iris.target # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

print(f"Dataset shape: {X.shape}")
print(f"Training set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")
print("-" * 30)

Dataset shape: (150, 4)
Training set shape: (105, 4)
Test set shape: (45, 4)
------------------------------


Step 3: Implement Cross-Validation

In [8]:
baseline_model = SVC(random_state=42)

cv_scores = cross_val_score(baseline_model, X_train, y_train, cv=5)
print(f"Scores for each fold: {cv_scores}")
print(f"Mean Accuracy: {cv_scores.mean():.4f}")
print(f"Standard Deviation: {cv_scores.std():.4f}")
print("-" * 30)


Scores for each fold: [0.95238095 0.95238095 1.         0.95238095 1.        ]
Mean Accuracy: 0.9714
Standard Deviation: 0.0233
------------------------------


Step 4: Hyperparameter Tuning with GridSearchCV

In [9]:
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'linear']
}

grid_search = GridSearchCV(
    estimator=SVC(random_state=42),
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    verbose=1,
    n_jobs=-1
)
grid_search.fit(X_train, y_train)

print(f"\nBest Parameters found: {grid_search.best_params_}")
print(f"Best Cross-Validation Score: {grid_search.best_score_:.4f}")
print("-" * 30)

Fitting 5 folds for each of 32 candidates, totalling 160 fits

Best Parameters found: {'C': 0.1, 'gamma': 1, 'kernel': 'linear'}
Best Cross-Validation Score: 0.9810
------------------------------


Step 5: Evaluate Best Model on Full Dataset

In [10]:
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

test_accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on Test Set: {test_accuracy:.4f}")

print("\nClassification Report on Test Set:")
print(classification_report(y_test, y_pred, target_names=iris.target_names))

Accuracy on Test Set: 0.9556

Classification Report on Test Set:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       0.93      0.93      0.93        15
   virginica       0.93      0.93      0.93        15

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45

