In [41]:
import pandas as pd

In [42]:
df = pd.read_csv('cancer.csv')
df.head()

Unnamed: 0,Age,Gender,BMI,Smoking,GeneticRisk,PhysicalActivity,AlcoholIntake,CancerHistory,Diagnosis
0,58,1,16.085313,0,1,8.146251,4.148219,1,1
1,71,0,30.828784,0,1,9.36163,3.519683,0,0
2,48,1,38.785084,0,2,5.135179,4.728368,0,1
3,34,0,30.040296,0,0,9.502792,2.044636,0,0
4,62,1,35.479721,0,0,5.35689,3.309849,0,1


In [43]:
X = df.drop('Diagnosis', axis = 1)
y = df['Diagnosis']
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)



from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV

model_param = {
    'logistic' : {
        'model' : LogisticRegression(),
        'params' : {
            'C' : [1,5,10]
        }
    },
    
    'decision' : {
        'model' : DecisionTreeClassifier(),
        'params' : {
            'min_samples_leaf' : [1, 5, 8, 10],
            'min_samples_split' : [5, 10, 15, 20]
        }
    },
    'random' : {
        'model' : RandomForestClassifier(),
        'params' : {
            'n_estimators' : [5, 10, 15, 20],
            'min_samples_leaf' : [1, 5, 8, 10],
            'min_samples_split' : [5, 10, 15, 20]
        }
    },
    'SVM' : {
        'model' : SVC(gamma = 'auto'),
        'params' : {
            'C' : [1, 10, 20],
            'kernel' : ['rbf', 'linear']
        }
},
    'knn' : {
        'model' : KNeighborsClassifier(),
        'params' : {
            'n_neighbors' : [5, 10, 15, 20]
        }
    }
}

In [44]:
result = []

for model_nam, mp in model_param.items():
    clf = GridSearchCV(mp['model'],mp['params'], cv = 5, return_train_score = False, n_jobs = -1)
    clf.fit(X_scaled, y)
    result.append({
        'Model' : model_nam,
        'Best score' : clf.best_score_,
        'Best Param' : clf.best_params_
    })
    

In [45]:
result = pd.DataFrame(result)
result

Unnamed: 0,Model,Best score,Best Param
0,logistic,0.848,{'C': 1}
1,decision,0.879333,"{'min_samples_leaf': 5, 'min_samples_split': 10}"
2,random,0.914667,"{'min_samples_leaf': 1, 'min_samples_split': 5..."
3,SVM,0.891333,"{'C': 10, 'kernel': 'rbf'}"
4,knn,0.881333,{'n_neighbors': 5}
