Question: - 

You are tasked with building an SVM model to predict whether a patient has breast cancer based on a set
of medical parameters. You have access to a dataset containing various features computed from a digitized
image of a fine needle aspirate (FNA) of a breast mass.

The target variable is binary:
- 0 = Benign (non-cancerous)
- 1 = Malignant (cancerous)

Using the given dataset, build an SVM classification model to predict whether a breast mass is benign or
malignant. Split the dataset into training and testing sets, then evaluate the performance of your model
in terms of accuracy, precision, recall, and F1-score.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer  # Added this import

In [None]:
# Loading the breast cancer dataset instead of the CSV file
breast_cancer = load_breast_cancer()
data = pd.DataFrame(breast_cancer.data, columns=breast_cancer.feature_names)
data['target'] = breast_cancer.target

# Display first 10 rows
data.head(10)

In [None]:
data.describe()
 # data.shape   // can make two columns 


In [None]:
print(data.columns)

In [None]:
# Select features (X) and target (y)
X = data.drop('target', axis=1)
y = data['target']
# Splitting into training and testing sets 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Create and train the SVM model
svm_model = SVC(kernel='rbf', random_state=42)
svm_model.fit(X_train_scaled, y_train)

In [None]:
# Make predictions on the train set
y_train_pred = svm_model.predict(X_train_scaled)
# Make predictions on the test set
y_test_pred = svm_model.predict(X_test_scaled)
print(y_test_pred)

In [None]:
# Calculate performance metrics for train set
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Train Accuracy: {train_accuracy:.4f}")

In [None]:
# Calculate performance metrics for test set
test_accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

In [None]:
# Create a confusion matrix
cm = confusion_matrix(y_test, y_test_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()