In [5]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.pipeline import Pipeline
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [6]:
data_set=pd.read_csv('/content/BreastCancer.csv')
print(data_set.columns)

Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
      dtype='object')


In [7]:
data_set.isnull().any

<bound method NDFrame._add_numeric_operations.<locals>.any of         id  diagnosis  radius_mean  texture_mean  perimeter_mean  area_mean  \
0    False      False        False         False           False      False   
1    False      False        False         False           False      False   
2    False      False        False         False           False      False   
3    False      False        False         False           False      False   
4    False      False        False         False           False      False   
..     ...        ...          ...           ...             ...        ...   
564  False      False        False         False           False      False   
565  False      False        False         False           False      False   
566  False      False        False         False           False      False   
567  False      False        False         False           False      False   
568  False      False        False         False           False     

In [8]:
data=data_set.drop(columns=['id','Unnamed: 32'])

In [9]:
print(data.columns)
print(data.shape)

Index(['diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst'],
      dtype='object')
(569, 31)


In [10]:
LE=preprocessing.LabelEncoder()
for col in data.columns:
  data[col]=LE.fit_transform(data[col])

In [11]:
X=data.drop(columns=['diagnosis'])
Y=data['diagnosis']
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.3)

In [12]:
models=[]
models.append(['LinearRegression',LinearRegression()])
models.append(['LogisticRegression',LogisticRegression()])
models.append(['RandomForest',RandomForestClassifier()])
models.append(['KNN',KNeighborsClassifier()])
models.append(['DecisionTree',DecisionTreeClassifier()])
models.append(['SVM',SVC(kernel='rbf')])

In [13]:
for name,model_clf in models:
  model = Pipeline([
        ('scaler', StandardScaler()),
        ('model', model_clf)
    ])
  model.fit(x_train,y_train)
  model_pred=model.predict(x_test)
  try:
    model_score=metrics.accuracy_score(y_test,model_pred)*100
  except Exception as e:
    model_score=(metrics.mean_squared_error(y_test,model_pred))
    model_score=(100-(model_score*100))
  print(f'Model Name : {name}\nModel Accuray : {model_score:.2f}% \n')

Model Name : LinearRegression
Model Accuray : 94.11% 

Model Name : LogisticRegression
Model Accuray : 97.08% 

Model Name : RandomForest
Model Accuray : 95.91% 

Model Name : KNN
Model Accuray : 96.49% 

Model Name : DecisionTree
Model Accuray : 95.32% 

Model Name : SVM
Model Accuray : 97.66% 

