# SVM


In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [17]:
## Function to Evaluate a Classifier
def eval_classifier(y_test, y_pred, model):
    # Evaluate accuary 
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{model} Accuracy: {accuracy}")
    
    # Classification Report
    print(f"Classification report: ")
    print(classification_report(y_test, y_pred))
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    print(f"Confusion Matrix: \n {cm}")

In [5]:
# Load the Breast Cancer Dataset
cancer = datasets.load_breast_cancer()
X = cancer

X = cancer.data

y = cancer.target.astype(int)

# Levels are 0 and 1 so it's binary classification

# Target Names are Malignant and Benign 

In [8]:
X.shape, y.shape, X, y
# 30 Features

((569, 30),
 (569,),
 array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1

In [7]:
# Split data into train & test

#Common Ratio is 80/20, another one is 70/30

#Might not get the same result when running the command from different people as the data is shuffled, Random_state keeps the order consistent as others
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=1)

X_train.shape, X_test.shape, y_train.shape, y_test.shape
## 455 for Training, 30 for Testing

((455, 30), (114, 30), (455,), (114,))

## SVM Classifier

In [9]:
# Instantiate a SVM Classifier 

clf_svm = svm.SVC(kernel='linear')

In [10]:
## Train the Model

clf_svm.fit(X_train, y_train)

SVC(kernel='linear')

In [11]:
## Predict with the Model for test dataset and store the results

svm_pred = clf_svm.predict(X_test)



In [18]:
## Evaluation

eval_classifier(y_test, svm_pred, 'SVM')

SVM Accuracy: 0.956140350877193
Classification report: 
              precision    recall  f1-score   support

           0       1.00      0.88      0.94        42
           1       0.94      1.00      0.97        72

    accuracy                           0.96       114
   macro avg       0.97      0.94      0.95       114
weighted avg       0.96      0.96      0.96       114

Confusion Matrix: 
 [[37  5]
 [ 0 72]]


In [19]:
from sklearn.linear_model import LogisticRegression
clf_lreg = LogisticRegression().fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [20]:
# Predict with Log Model
lreg_pred = clf_lreg.predict(X_test)

In [21]:
# Eval with log
eval_classifier(y_test, lreg_pred, 'LREG')

LREG Accuracy: 0.9473684210526315
Classification report: 
              precision    recall  f1-score   support

           0       0.95      0.90      0.93        42
           1       0.95      0.97      0.96        72

    accuracy                           0.95       114
   macro avg       0.95      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

Confusion Matrix: 
 [[38  4]
 [ 2 70]]
