In [None]:
# Import necessary libraries
import numpy as np
from sklearn import datasets  # For loading datasets like Iris
from sklearn.model_selection import train_test_split, GridSearchCV  # For splitting data and hyperparameter tuning
from sklearn.svm import SVC  # For the SVM classifier
from sklearn.preprocessing import StandardScaler  # For feature scaling
from sklearn.metrics import accuracy_score, classification_report  # For model evaluation
import joblib  # For saving the trained model to disk

# 1. Load the Iris dataset
# The Iris dataset is a simple dataset with 4 features and 3 classes.
iris = datasets.load_iris()
X = iris.data  # Features
y = iris.target  # Target variable (labels)

# 2. Split the dataset into training and testing sets
# We use 70% of the data for training and 30% for testing. This helps us evaluate the performance of the model.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 3. Preprocess the data (feature scaling)
# SVMs work better when the features are scaled to a similar range, hence we use StandardScaler to scale the features.
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit and transform training data
X_test = scaler.transform(X_test)  # Only transform test data (to avoid data leakage)

# 4. Create an instance of the SVC classifier with a polynomial kernel
# We are using a polynomial kernel here to create a non-linear decision boundary.
svc = SVC(kernel='poly')

# 5. Train the classifier on the training data
# The fit method trains the model using the training dataset.
svc.fit(X_train, y_train)

# 6. Predict the labels of the testing data
# After training the model, we can use it to make predictions on the test set.
y_pred = svc.predict(X_test)

# 7. Evaluate the performance using accuracy, precision, recall, and F1-score
# We use accuracy, precision, recall, and F1-score to evaluate the model's performance on the test set.
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

# 8. Hyperparameter tuning using GridSearchCV
# To find the best hyperparameters for the model, we perform grid search over different values of 'C', 'degree', and 'coef0'.
param_grid = {
    'C': [0.1, 1, 10],  # Regularization parameter
    'degree': [2, 3, 4],  # Degree of the polynomial kernel
    'coef0': [0, 1],  # Constant term in the kernel function
}

# We use GridSearchCV to perform cross-validation and search for the best combination of parameters.
grid_search = GridSearchCV(SVC(kernel='poly'), param_grid, cv=5, verbose=1)
grid_search.fit(X_train, y_train)

# 9. Print the best parameters found from GridSearchCV
# After tuning, we print the best combination of hyperparameters.
print(f"Best parameters: {grid_search.best_params_}")

# 10. Train the tuned classifier on the entire dataset
# Once we have the best hyperparameters, we retrain the model using the entire dataset (train + test).
best_svc = grid_search.best_estimator_
best_svc.fit(X, y)

# 11. Save the trained classifier to a file for future use
# We save the trained classifier so that it can be used later without retraining.
joblib.dump(best_svc, 'svc_model.pkl')

# 12. Load the saved model from the file
# Later, we can load the trained model to make predictions on new data.
loaded_model = joblib.load('svc_model.pkl')

# You can now use `loaded_model` to make predictions on new data.
