<a href="https://colab.research.google.com/github/Ramanji409/breast_cancer_prediction/blob/main/breast_cancer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
from sklearn.datasets import load_breast_cancer

In [15]:
import pandas as pd
data=load_breast_cancer(as_frame=True).frame
data.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [16]:
data.isnull().sum()

Unnamed: 0,0
mean radius,0
mean texture,0
mean perimeter,0
mean area,0
mean smoothness,0
mean compactness,0
mean concavity,0
mean concave points,0
mean symmetry,0
mean fractal dimension,0


In [17]:
x=data.drop(columns=['target'])
y=data['target']

In [18]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
x=scaler.fit_transform(x)

In [19]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [20]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [21]:
models={
    'LogisticRegression':LogisticRegression(),
    'SVM':SVC(),
    'NaiveBayes':GaussianNB(),
    'KNN':KNeighborsClassifier(),
    'DecisionTree':DecisionTreeClassifier(),
    'RandomForest':RandomForestClassifier()
}

In [22]:
for name, model in models.items():
    model.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    acc=accuracy_score(y_test,y_pred)
    confusion_mat=confusion_matrix(y_test,y_pred)
    report_text=classification_report(y_test,y_pred)
    print(f'Model:{name}')
    print(f'Accuracy:{acc}')
    print('Confusion_matrix:')
    print(confusion_mat)
    print(f'Classification_Report: {report_text}')
    print('='*70)

Model:LogisticRegression
Accuracy:0.9736842105263158
Confusion_matrix:
[[41  2]
 [ 1 70]]
Classification_Report:               precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

Model:SVM
Accuracy:0.9736842105263158
Confusion_matrix:
[[41  2]
 [ 1 70]]
Classification_Report:               precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

Model:NaiveBayes
Accuracy:0.9649122807017544
Confusion_matrix:
[[40  3]
 [ 1 70]]
Classification_Report:               precision    rec

In [23]:
from sklearn.model_selection import GridSearchCV

param_grid={
    'penalty': ['l1', 'l2'],
    'C': [0.01, 0.1, 1, 10, 100],
    'solver': ['liblinear', 'saga'],
    'max_iter': [100, 200, 500]
}

In [24]:
grid=GridSearchCV(LogisticRegression(),param_grid,cv=5,
    scoring='accuracy',
    n_jobs=-1,
    refit=True,
    verbose=1)

In [25]:
grid.fit(x_train,y_train)

Fitting 5 folds for each of 60 candidates, totalling 300 fits


In [26]:
best_model=grid.best_estimator_
y_pred=best_model.predict(x_test)
print('Model:Logistic Regression')
acc=accuracy_score(y_test,y_pred)
confusion_mat=confusion_matrix(y_test,y_pred)
report_text=classification_report(y_test,y_pred)
print(f'Accuracy:{acc}')
print('Confusion_matrix:')
print(confusion_mat)
print(f'Classification_Report: {report_text}')

Model:Logistic Regression
Accuracy:0.9912280701754386
Confusion_matrix:
[[42  1]
 [ 0 71]]
Classification_Report:               precision    recall  f1-score   support

           0       1.00      0.98      0.99        43
           1       0.99      1.00      0.99        71

    accuracy                           0.99       114
   macro avg       0.99      0.99      0.99       114
weighted avg       0.99      0.99      0.99       114

