In [None]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# Load the Breast Cancer dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="target")


In [None]:
# Data Exploration
print("Dataset Shape:", X.shape)
print("\nFeature Names:", data.feature_names)
print("\nTarget Classes:", data.target_names)


In [None]:
# Visualize the distribution of target classes
sns.countplot(x=y)
plt.title("Target Class Distribution")
plt.show()


In [None]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the Data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# SVM Model Training with Default Parameters
svm = SVC(kernel="rbf", random_state=42)
svm.fit(X_train_scaled, y_train)


In [None]:
# Predictions and Evaluation
y_pred = svm.predict(X_test_scaled)
print("\nClassification Report (Default Parameters):")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


In [None]:
# Hyperparameter Tuning using GridSearchCV
param_grid = {
    "C": [0.1, 1, 10],
    "gamma": [1, 0.1, 0.01],
    "kernel": ["rbf", "poly"]
}
grid_search = GridSearchCV(SVC(), param_grid, cv=5, verbose=2, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)


In [None]:
# Best Parameters and Model Evaluation
print("\nBest Parameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(X_test_scaled)

print("\nClassification Report (Best Model):")
print(classification_report(y_test, y_pred_best))
print("Accuracy (Best Model):", accuracy_score(y_test, y_pred_best))


In [None]:
# Visualize Hyperparameter Tuning Results
results = pd.DataFrame(grid_search.cv_results_)
plt.figure(figsize=(10, 6))
sns.lineplot(x=results.index, y=results["mean_test_score"], marker="o")
plt.title("Grid Search CV Results")
plt.xlabel("Parameter Combination Index")
plt.ylabel("Mean Test Score")
plt.show()
