# Introduction

### 1. In this problem we have to use 30 different columns and we have to predict the Stage of Breast Cancer M (Malignant)  and B (Bengin)
### 2. Attribute Information:

- 1) ID number

- 2) Diagnosis (M = malignant, B = benign)

- 3-32.Ten real-valued features are computed for each cell nucleus:

    - a) radius (mean of distances from center to points on the perimeter)

    - b) texture (standard deviation of gray-scale values)

    - c) perimeter

    - d) area

    - e) smoothness (local variation in radius lengths)

    - f) compactness (perimeter^2 / area - 1.0)

    - g). concavity (severity of concave portions of the contour)

    - h). concave points (number of concave portions of the contour)

    - i). symmetry

    - j). fractal dimension ("coastline approximation" - 1)

### 3.  here 3- 32 are divided into three parts first is Mean (3-13),  Stranded Error(13-23) and  Worst(23-32) and each contain 10 parameter (radius, texture,area, perimeter, smoothness,compactness,concavity,concave points,symmetry and fractal dimension) 

### 4. Here Mean means the means of the all cells,  standard Error of all cell and worst means the worst  cell 

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
# loading the raw data
bc = pd.read_csv("data/breast-cancer-winconsin.csv")
bc.head()

In [None]:
bc.shape

In [None]:
bc.info()

In [None]:
#Drop unnecessary column
bc.drop("Unnamed: 32",axis=1,inplace=True)
bc.drop("id",axis=1,inplace=True)

In [None]:
# Replace value M, B -> 1, 0
bc['diagnosis'] = bc['diagnosis'].map({'M':1,'B':0})

# Explore The Data

In [None]:
bc.head()

In [None]:
bc.shape

In [None]:
bc.columns

In [None]:
bc.describe() 

In [None]:
sns.countplot(bc['diagnosis'],label="Count")

In [None]:
bc.diagnosis.value_counts()

## Data Analysis a little feature selection

In [None]:
features_mean= list(bc.columns[1:11])
features_se= list(bc.columns[11:20])
features_worst=list(bc.columns[21:31])
print(features_mean)
print("-----------------------------------")
print(features_se)
print("------------------------------------")
print(features_worst)

In [None]:
corr = bc[features_mean].corr() 
plt.figure(figsize=(14,14))
sns.heatmap(corr, cbar = True,  square = True, annot=True, fmt= '.2f',annot_kws={'size': 15},
            xticklabels= features_mean, yticklabels= features_mean, cmap= 'coolwarm')

- radius, parameter and area are highly correlated as expected from their relation so from these we will use anyone of them => perimeter
- texture
- smoothness
- compactness, concavity and concavepoint are highly correlated so we will use compactness => compactness
- symmetry
- fractal

# Modeling

In [None]:
#Spliting
from sklearn.model_selection import train_test_split

X = bc[['texture_mean','perimeter_mean','smoothness_mean','compactness_mean','symmetry_mean']]
#X = bc.drop('diagnosis', axis =1)
y = bc.diagnosis

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# we only take the first two features for all models
X_train = X_train.iloc[:, :2]
X_test = X_test.iloc[:, :2]

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

logreg = LogisticRegression()
logreg.fit(X_train, y_train)

y_pred = logreg.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

# KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

# SVM

In [None]:
from sklearn.svm import SVC

In [None]:
def make_meshgrid(x, y, h=.02):
    x_min, x_max = x.min() - 1, x.max() + 1
    y_min, y_max = y.min() - 1, y.max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    return xx, yy

def plot_contours(ax, clf, xx, yy, **params):
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    out = ax.contourf(xx, yy, Z, **params)
    
    ax.scatter(X0, X1, c=y_train, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
    ax.set_ylabel('Y')
    ax.set_xlabel('X')
    ax.set_xticks(())
    ax.set_yticks(())
    ax.set_title(title)
    ax.legend()
    return out

### Linear Kernel, C=1 

In [None]:
svm = SVC(kernel='linear', C=1, random_state=0)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

In [None]:
svm = SVC(kernel='linear', C=1, random_state=0)
clf = svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

fig, ax = plt.subplots()
title = ('Decision surface of linear SVC ')
X0, X1 = X_train.iloc[:, 0], X_train.iloc[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
plt.show()

### Gamma = 0.01

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.01, C=1)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.01, C=1)
clf = svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

fig, ax = plt.subplots()
title = ('Decision surface of linear SVC ')
X0, X1 = X_train.iloc[:, 0], X_train.iloc[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
plt.show()

### Gamma = 1.0

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=1, C=1)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=1, C=1)
clf = svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

fig, ax = plt.subplots()
title = ('Decision surface of linear SVC ')
X0, X1 = X_train.iloc[:, 0], X_train.iloc[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
plt.show()

### Gamma = 10

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=10, C=1)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=10, C=1)
clf = svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

fig, ax = plt.subplots()
title = ('Decision surface of linear SVC ')
X0, X1 = X_train.iloc[:, 0], X_train.iloc[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
plt.show()

### Gamma = 100

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=100, C=1)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=100, C=1)
clf = svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

fig, ax = plt.subplots()
title = ('Decision surface of linear SVC ')
X0, X1 = X_train.iloc[:, 0], X_train.iloc[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
plt.show()

### C = 1

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.01, C=1)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.01, C=1)
clf = svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

fig, ax = plt.subplots()
title = ('Decision surface of linear SVC ')
X0, X1 = X_train.iloc[:, 0], X_train.iloc[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
plt.show()

### C = 10

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.01, C=10)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.01, C=10)
clf = svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

fig, ax = plt.subplots()
title = ('Decision surface of linear SVC ')
X0, X1 = X_train.iloc[:, 0], X_train.iloc[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
plt.show()

### C = 1000

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.01, C=1000)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.01, C=1000)
clf = svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

fig, ax = plt.subplots()
title = ('Decision surface of linear SVC ')
X0, X1 = X_train.iloc[:, 0], X_train.iloc[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
plt.show()

### C = 10000

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.01, C=10000)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.01, C=10000)
clf = svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

fig, ax = plt.subplots()
title = ('Decision surface of linear SVC ')
X0, X1 = X_train.iloc[:, 0], X_train.iloc[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
plt.show()

### C = 100000

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.01, C=100000)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.01, C=100000)
clf = svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

fig, ax = plt.subplots()
title = ('Decision surface of linear SVC ')
X0, X1 = X_train.iloc[:, 0], X_train.iloc[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
plt.show()

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))