# Breast Cancer Detection Model Research by Melike Yesil

In [77]:
pip install numpy pandas matplotlib scikit-learn seaborn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [78]:
from sklearn.datasets import load_breast_cancer 
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

import numpy as np
import random

In [79]:
data = load_breast_cancer()

## The features below are analyzed by our model to output a prediction

The features are measured and inputted into an array by a medical professional.

In [80]:
print(data['feature_names'])

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']


## target_names refers to the two possible prediction results

In [81]:
print(data['target_names'])

['malignant' 'benign']


## Preparing and Splitting Data

In [82]:
X = data['data']
Y = data['target']

In [83]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

## Training Model

In [84]:
clf = KNeighborsClassifier()
clf.fit(x_train, y_train)

KNeighborsClassifier()

## Testing Model Accuracy

In [85]:
print(clf.score(x_test, y_test))

0.9473684210526315


## Predicting Example

In [86]:
print(data['feature_names'])

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']


In [102]:
x_new = np.array(random.sample(range(0,55), len(data['feature_names'])))

In [88]:
print(x_new)

[11 49 51 48  4 38  0 42 35 13 43 26 29 37 20 47  1 36 34 44  9 16 41 31
 17  2 28  7 23 32]


In [103]:
print(data['target_names'][clf.predict([x_new])][0])

benign
