# Importing essential libraries

In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics

# Load the dataset

In [2]:
cancer = datasets.load_breast_cancer()

# Exploring Data /Data Analysis

In [3]:
# print the names of the 13 features
print("Features: ", cancer.feature_names)

# print the label type of cancer('malignant' 'benign')
print("Labels: ", cancer.target_names)


Features:  ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Labels:  ['malignant' 'benign']


# Split the data into training/testing sets

In [4]:
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.3,random_state=109) # 70% training and 30% test

# Generating Model

In [5]:
clf = svm.SVC(kernel='linear') # Linear Kernel

# Train the model using the training sets

In [6]:
clf.fit(X_train, y_train)

SVC(kernel='linear')

# Predict the response for test dataset

In [7]:
y_pred = clf.predict(X_test)

# Evaluating the Model

In [8]:
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision:",metrics.precision_score(y_test, y_pred))

# Model Recall: what percentage of positive tuples are labelled as such?
print("Recall:",metrics.recall_score(y_test, y_pred))

print("Confusiuon Matrix:",metrics.confusion_matrix(y_test, y_pred))

Accuracy: 0.9649122807017544
Precision: 0.9811320754716981
Recall: 0.9629629629629629
Confusiuon Matrix: [[ 61   2]
 [  4 104]]


In [None]:
Q9. In this week’s kandi 1-click kit on Breast Cancer Prediction, perform hyperparameter tuning to find the optimal values for SVM's hyperparameters (e.g., C and gamma). 
Use techniques like grid search or random search to explore different combinations of hyperparameters and choose the best performing ones. 
Upload screenshots of both the code and the output (upload only in PNG image format).

In [4]:
from sklearn.datasets import load_breast_cancer
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
import numpy as np

# Load the Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create an SVM classifier
svm_classifier = SVC()
# Define the parameter grid for grid search
param_grid = {'C': [0.1, 1, 10], 'gamma': [0.1, 0.01, 0.001]}
# Create a grid search object
grid_search = GridSearchCV(svm_classifier, param_grid, cv=5)
grid_search.fit(X_train, y_train)
# Get the best parameters and best score from grid search
best_params_grid = grid_search.best_params_
best_score_grid = grid_search.best_score_
# Define the parameter distributions for random search
param_dist = {'C': np.logspace(-3, 3, 7), 'gamma': np.logspace(-6, 0, 7)}
# Create a random search object
random_search = RandomizedSearchCV(svm_classifier, param_distributions=param_dist, n_iter=10, cv=5)
random_search.fit(X_train, y_train)
# Get the best parameters and best score from random search
best_params_random = random_search.best_params_
best_score_random = random_search.best_score_
# Print the results
print("Grid Search - Best Parameters:", best_params_grid)
print("Grid Search - Best Score:", best_score_grid)
print("Random Search - Best Parameters:", best_params_random)
print("Random Search - Best Score:", best_score_random)

Grid Search - Best Parameters: {'C': 1, 'gamma': 0.001}
Grid Search - Best Score: 0.9098901098901099
Random Search - Best Parameters: {'gamma': 1e-05, 'C': 1.0}
Random Search - Best Score: 0.9164835164835166


In [None]:
Q10. In the Breast Cancer Prediction kandi 1-click kit, change the algorithm from Support Vector to Decision Treesand showcase the classification 
metrics along with accuracy, precision and recall displayed separately as well.
Upload screenshots of both the code and the output (upload only in PNG image format).

In [6]:
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
# Load the Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create a Decision Tree classifier
decision_tree = DecisionTreeClassifier()
# Fit the model on the training data
decision_tree.fit(X_train, y_train)
# Make predictions on the test data
y_pred = decision_tree.predict(X_test)
# Calculate classification metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
# Print the metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)


Accuracy: 0.9298245614035088
Precision: 0.9436619718309859
Recall: 0.9436619718309859
