# Breast Cancer Diagnosis

### Load Dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import linear_model, tree, ensemble
from sklearn.metrics import accuracy_score, classification_report

In [None]:
df = pd.read_csv("data.csv")
df.head()

### Visualization

In [None]:
df['diagnosis'].value_counts()

In [None]:
sns.countplot(df['diagnosis'], label='Count') 

In [None]:
sns.pairplot(df.iloc[:, 1:6], hue='diagnosis')

In [None]:
plt.figure(figsize=(16,12))
sns.heatmap(df.iloc[:, 1:12].corr(), annot=True, fmt= '.2f')

### Preprocess

In [None]:
df.isna().sum()

In [None]:
df = df.drop(['id', 'Unnamed: 32'], axis=1)

In [None]:
df.shape

In [None]:
label = LabelEncoder()
df['diagnosis'] = label.fit_transform(df['diagnosis'])

In [None]:
X = df.drop(['diagnosis'], axis=1)
y = df['diagnosis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=2)

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### Training

In [None]:
def models(X_train, y_train):
    log = linear_model.LogisticRegression(random_state=0)
    log.fit(X_train, y_train)
    
    tr = tree.DecisionTreeClassifier(criterion='entropy', random_state=0)
    tr.fit(X_train, y_train)
    
    forest = ensemble.RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    forest.fit(X_train, y_train)
    
    print("[0]Logistic Regression Training Accuracy: ", log.score(X_train, y_train))
    print("[1]Decision Tree Classifier Training Accuracy: ", tr.score(X_train, y_train))
    print("[2]Random Forest Classifier Training Accuracy: ", forest.score(X_train, y_train))
    
    return log, tr, forest

In [None]:
models = models(X_train, y_train)

### Evaluation

In [None]:
def eval_model(model):
    print(classification_report(y_test, model.predict(X_test)))
    print(accuracy_score(y_test, model.predict(X_test)))

for m in models:
    eval_model(m)

In [None]:
'''
Inspiration
1. https://www.youtube.com/watch?v=NSSOyhJBmWY
'''