In [2]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [3]:
from sklearn.datasets import load_breast_cancer

In [5]:
data=load_breast_cancer()

In [6]:
df=pd.DataFrame(data=data.data,columns=data.feature_names)

In [7]:
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [8]:
df['Target']=data.target

In [9]:
df['Target'].unique()

array([0, 1])

In [10]:
Corr_columns=['mean perimeter', 'mean area', 'mean concavity',
       'mean concave points', 'perimeter error', 'area error',
       'worst radius', 'worst texture', 'worst perimeter', 'worst area',
       'worst compactness', 'worst concavity', 'worst concave points']

In [11]:
df=df.drop(labels=Corr_columns,axis=1)

In [12]:
df.head()

Unnamed: 0,mean radius,mean texture,mean smoothness,mean compactness,mean symmetry,mean fractal dimension,radius error,texture error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst smoothness,worst symmetry,worst fractal dimension,Target
0,17.99,10.38,0.1184,0.2776,0.2419,0.07871,1.095,0.9053,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,0.1622,0.4601,0.1189,0
1,20.57,17.77,0.08474,0.07864,0.1812,0.05667,0.5435,0.7339,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,0.1238,0.275,0.08902,0
2,19.69,21.25,0.1096,0.1599,0.2069,0.05999,0.7456,0.7869,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,0.1444,0.3613,0.08758,0
3,11.42,20.38,0.1425,0.2839,0.2597,0.09744,0.4956,1.156,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,0.2098,0.6638,0.173,0
4,20.29,14.34,0.1003,0.1328,0.1809,0.05883,0.7572,0.7813,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,0.1374,0.2364,0.07678,0


In [13]:
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

In [15]:
X=df.drop(labels='Target',axis=1)
y=df.Target

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [18]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [23]:
numeric_pipeline=Pipeline(
        steps=[
            ('impute',SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ]
)

In [24]:
numrical_col=X.columns.to_list()

In [25]:
numrical_col

['mean radius',
 'mean texture',
 'mean smoothness',
 'mean compactness',
 'mean symmetry',
 'mean fractal dimension',
 'radius error',
 'texture error',
 'smoothness error',
 'compactness error',
 'concavity error',
 'concave points error',
 'symmetry error',
 'fractal dimension error',
 'worst smoothness',
 'worst symmetry',
 'worst fractal dimension']

In [28]:
preprocessor=ColumnTransformer(
    [
    ('numeric_pipeline',numeric_pipeline,numrical_col)
    ]
)

In [29]:
X_train=preprocessor.fit_transform(X_train)
X_test=preprocessor.transform(X_test)

In [33]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier

In [36]:
models={
    'LogisticRegression':LogisticRegression(),
    'DecisionTreeClassifier':DecisionTreeClassifier(),
    'BernoulliNB':BernoulliNB(),
    'SVC':SVC(),
    'RandomForestClassifier':RandomForestClassifier(),
    'GradientBoostingClassifier':GradientBoostingClassifier(),
    'AdaBoostClassifier':AdaBoostClassifier()
}

In [37]:
from sklearn.metrics import r2_score

In [48]:
def evaluatemetric(X_train, X_test, y_train, y_test,models):
    report={}
    for i in range(len(models)):
        model=list(models.values())[i]
        model.fit(X_train,y_train)
        ##predict the values
        Y_predict=model.predict(X_test)
        score=r2_score(y_test,Y_predict)
        report[list(models.keys())[i]]=score
    return report             

In [49]:
evaluatemetric(X_train, X_test, y_train, y_test,models)

{'LogisticRegression': 0.8608609843345257,
 'DecisionTreeClassifier': 0.6521524608363143,
 'BernoulliNB': 0.35068459356112003,
 'SVC': 0.7681016405575429,
 'RandomForestClassifier': 0.83767114839028,
 'GradientBoostingClassifier': 0.8144813124460343,
 'AdaBoostClassifier': 0.8144813124460343}

In [51]:
report=evaluatemetric(X_train, X_test, y_train, y_test,models)

In [52]:
report

{'LogisticRegression': 0.8608609843345257,
 'DecisionTreeClassifier': 0.5362032811150858,
 'BernoulliNB': 0.35068459356112003,
 'SVC': 0.7681016405575429,
 'RandomForestClassifier': 0.7449118046132972,
 'GradientBoostingClassifier': 0.8144813124460343,
 'AdaBoostClassifier': 0.8144813124460343}

In [54]:
best_model_score=max(report.values())
best_model_score

0.8608609843345257

In [59]:
bestmoodel=list(report.keys())[
list(report.values()).index(best_model_score)]

In [60]:
bestmoodel

'LogisticRegression'

In [62]:
list(report.keys())[0]

'LogisticRegression'

In [65]:
list(report.values()).index(best_model_score)

0

In [68]:
bestmodel_obj=models[bestmoodel]

In [69]:
bestmodel_obj