<a href="https://colab.research.google.com/github/Nurulafifa0/Kumpulan-Tugas-Mingguan/blob/main/Week8_ML_SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Support Vektor Machines

In [2]:
# Importing packages

import pandas as pd # data processing
import numpy as np # working with arrays
import seaborn as sb # visualization
import matplotlib.pyplot as plt # visualization
from sklearn.svm import SVC # SVM model algorithm
from sklearn.metrics import accuracy_score # evalution metric
from sklearn.metrics import confusion_matrix # evalution metric
from sklearn.model_selection import train_test_split # splitting the data
from termcolor import colored as cl # text customization

sb.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (20, 10)

Importing Data & EDA

In [None]:
df = pd.read_csv('cancer_data.csv')
df.drop('Unnamed: 0', axis = 1, inplace = True)

df.head()

In [None]:
benign_samples = df[df['Class'] == 2][0:50]
malignant_samples = df[df['Class'] == 4][0:50]

sb.scatterplot(benign_samples['Clump'], benign_samples['UnifSize'], s = 150, label = 'Benign')
sb.scatterplot(malignant_samples['Clump'], malignant_samples['UnifSize'], s = 150, label = 'Malignant')
plt.legend(fontsize = 14)
plt.title('CLUMP / UNIFORMITY', fontsize = 16)
plt.xlabel('Clump Thickness', fontsize = 14)
plt.ylabel('Uniformity Size', fontsize = 14)
plt.xticks(fontsize = 12)
plt.yticks(fontsize = 12)

plt.savefig('clump_unif.png')
plt.show()

Data processing

In [None]:
print(cl(df.dtypes, attrs = ['bold']))

In [None]:
df = df[pd.to_numeric(df['BareNuc'], errors='coerce').notnull()]
df['BareNuc'] = df['BareNuc'].astype('int64')

print(cl(df.dtypes, attrs = ['bold']))

Feature Selection & Train Test Split

In [None]:
X_var = np.asarray(df.drop('Class', axis = 1))
y_var = np.asarray(df['Class'])

print(cl('X_var samples : ', attrs = ['bold']), X_var[:5])
print(cl('y_var samples : ', attrs = ['bold']), y_var[:5])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_var, y_var, test_size = 0.2, random_state = 4)

print(cl('X_train samples : ', attrs = ['bold']), X_train[:5])
print(cl('X_test samples : ', attrs = ['bold']), X_test[:5])
print(cl('y_train samples : ', attrs = ['bold']), y_train[:5])
print(cl('y_test samples : ', attrs = ['bold']), y_test[:5])

Modeling & Prediction

In [None]:
model = SVC(kernel = 'rbf')
model.fit(X_train, y_train)

In [None]:
yhat = model.predict(X_test)

print(cl('yhat samples : ', attrs = ['bold']), yhat[:10])

Evaluation

In [None]:
print(cl('Accuracy score of our model is {}'.format(accuracy_score(y_test, yhat).round(3)), attrs = ['bold']))


In [None]:
import itertools

def plot_confusion_matrix(cm, classes,normalize = False, title = 'Confusion matrix', cmap = plt.cm.Blues):
    
    if normalize:
        cm = cm.astype(float) / cm.sum(axis=1)[:, np.newaxis]

    plt.imshow(cm, interpolation = 'nearest', cmap = cmap)
    plt.title(title, fontsize = 22)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation = 45, fontsize = 13)
    plt.yticks(tick_marks, classes, fontsize = 13)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment = 'center',
                 fontsize = 15,
                 color = 'white' if cm[i, j] > thresh else 'black')

    plt.tight_layout()
    plt.ylabel('True label', fontsize = 16)
    plt.xlabel('Predicted label', fontsize = 16)

# Compute confusion matrix

cnf_matrix = confusion_matrix(y_test, yhat, labels = [2, 4])
np.set_printoptions(precision = 2)


# Plot the confusion matrix

plt.figure()
plot_confusion_matrix(cnf_matrix, classes = ['Benign(2)','Malignant(4)'], normalize = False,  title = 'Confusion matrix')
plt.savefig('confusion_matrix.png')
plt.show()

Final Thoughts!

In [None]:
# Importing packages

import pandas as pd # data processing
import numpy as np # working with arrays
import seaborn as sb # visualization
import matplotlib.pyplot as plt # visualization
from sklearn.svm import SVC # SVM model algorithm
from sklearn.metrics import accuracy_score # evalution metric
from sklearn.metrics import confusion_matrix # evalution metric
from sklearn.model_selection import train_test_split # splitting the data
from termcolor import colored as cl # text customization

sb.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (20, 10)

df = pd.read_csv('cancer_data.csv')
df.drop('Unnamed: 0', axis = 1, inplace = True)

df.head()

benign_samples = df[df['Class'] == 2][0:50]
malignant_samples = df[df['Class'] == 4][0:50]

sb.scatterplot(benign_samples['Clump'], benign_samples['UnifSize'], s = 150, label = 'Benign')
sb.scatterplot(malignant_samples['Clump'], malignant_samples['UnifSize'], s = 150, label = 'Malignant')
plt.legend(fontsize = 14)
plt.title('CLUMP / UNIFORMITY', fontsize = 16)
plt.xlabel('Clump Thickness', fontsize = 14)
plt.ylabel('Uniformity Size', fontsize = 14)
plt.xticks(fontsize = 12)
plt.yticks(fontsize = 12)

plt.savefig('clump_unif.png')
plt.show()

print(cl(df.dtypes, attrs = ['bold']))

df = df[pd.to_numeric(df['BareNuc'], errors='coerce').notnull()]
df['BareNuc'] = df['BareNuc'].astype('int64')

print(cl(df.dtypes, attrs = ['bold']))

X_var = np.asarray(df.drop('Class', axis = 1))
y_var = np.asarray(df['Class'])

print(cl('X_var samples : ', attrs = ['bold']), X_var[:5])
print(cl('y_var samples : ', attrs = ['bold']), y_var[:5])

X_train, X_test, y_train, y_test = train_test_split(X_var, y_var, test_size = 0.2, random_state = 4)

print(cl('X_train samples : ', attrs = ['bold']), X_train[:5])
print(cl('X_test samples : ', attrs = ['bold']), X_test[:5])
print(cl('y_train samples : ', attrs = ['bold']), y_train[:5])
print(cl('y_test samples : ', attrs = ['bold']), y_test[:5])

model = SVC(kernel = 'rbf')
model.fit(X_train, y_train)

yhat = model.predict(X_test)

print(cl('yhat samples : ', attrs = ['bold']), yhat[:10])

print(cl('Accuracy score of our model is {}'.format(accuracy_score(y_test, yhat).round(3)), attrs = ['bold']))

import itertools
def plot_confusion_matrix(cm, classes,normalize = False, title = 'Confusion matrix', cmap = plt.cm.Blues):
    
    if normalize:
        cm = cm.astype(float) / cm.sum(axis=1)[:, np.newaxis]

    plt.imshow(cm, interpolation = 'nearest', cmap = cmap)
    plt.title(title, fontsize = 22)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation = 45, fontsize = 13)
    plt.yticks(tick_marks, classes, fontsize = 13)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment = 'center',
                 fontsize = 15,
                 color = 'white' if cm[i, j] > thresh else 'black')

    plt.tight_layout()
    plt.ylabel('True label', fontsize = 16)
    plt.xlabel('Predicted label', fontsize = 16)

# Compute confusion matrix

cnf_matrix = confusion_matrix(y_test, yhat, labels = [2, 4])
np.set_printoptions(precision = 2)

# Plot the confusion matrix

plt.figure()
plot_confusion_matrix(cnf_matrix, classes = ['Benign(2)','Malignant(4)'], normalize = False,  title = 'Confusion matrix')
plt.savefig('confusion_matrix.png')
plt.show()