In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split, KFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
# 1. Data Exploration and Preparation
iris = datasets.load_iris()
X = iris.data  # Features
y = iris.target  # Target labels (species)

# No missing values found in the Iris dataset

In [3]:
# Scaling features (optional, but recommended for SVM)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# No categorical variables found in the Iris dataset

In [4]:
# 2. SVM Implementation with Hyperparameter Tuning
print("Trying different SVM kernel and C values:")
kernels = ['linear', 'rbf']
C_values = [0.1, 1, 10]
best_model = None
best_accuracy = 0

Trying different SVM kernel and C values:


In [5]:
for kernel in kernels:
    for C in C_values:
        svm_model = SVC(kernel=kernel, C=C)
        # 3. K-fold Cross-Validation (using 10 folds)
        kfold = KFold(n_splits=10, shuffle=True)
        accuracy_scores = []
        for train_index, test_index in kfold.split(X_scaled):
            X_train, X_test = X_scaled[train_index], X_scaled[test_index]
            y_train, y_test = y[train_index], y[test_index]
            svm_model.fit(X_train, y_train)
            y_pred = svm_model.predict(X_test)
            accuracy_scores.append(accuracy_score(y_test, y_pred))

        # Calculate average accuracy across folds
        avg_accuracy = sum(accuracy_scores) / len(accuracy_scores)
        print(f"Kernel: {kernel}, C: {C}, Average Accuracy: {avg_accuracy:.4f}")

        if avg_accuracy > best_accuracy:
            best_model = svm_model
            best_accuracy = avg_accuracy

print(f"\nBest Model Configuration: Kernel: {best_model.kernel}, C: {best_model.C}, Average Accuracy: {best_accuracy:.4f}")

Kernel: linear, C: 0.1, Average Accuracy: 0.9667
Kernel: linear, C: 1, Average Accuracy: 0.9667
Kernel: linear, C: 10, Average Accuracy: 0.9600
Kernel: rbf, C: 0.1, Average Accuracy: 0.9200
Kernel: rbf, C: 1, Average Accuracy: 0.9667
Kernel: rbf, C: 10, Average Accuracy: 0.9533

Best Model Configuration: Kernel: linear, C: 0.1, Average Accuracy: 0.9667


In [6]:
# 4. Evaluation Metrics on Best Model
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2)
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("\nEvaluation Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (weighted): {precision:.4f}")
print(f"Recall (weighted): {recall:.4f}")
print(f"F1-score (weighted): {f1:.4f}")


Evaluation Metrics:
Accuracy: 0.8333
Precision (weighted): 0.8722
Recall (weighted): 0.8333
F1-score (weighted): 0.8407
