In [1]:
import numpy as np 
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer , f1_score

df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data'  , header=None)
X = df.loc[: , 2:].values
y = df.loc[:, 1].values
le = LabelEncoder()
y = le.fit_transform(y)
np.bincount(y)

array([357, 212], dtype=int64)

In [2]:
X_train , X_test , y_train , y_test = train_test_split(X,
                                        y, 
                                        test_size=0.25, 
                                        stratify=y, 
                                        random_state=1)

In [3]:
param_range = [0.001 , 0.01 , 0.1 , 1.0 , 10.0 , 100.0 , 1000.0]
degree=[2,3,4,5,6,7]

pipe_svm = make_pipeline(StandardScaler(), 
            SVC(random_state=1))

param_grid = [{'svc__C': param_range, 'svc__kernel': ['linear']},
                {'svc__C': param_range,'svc__gamma': param_range,'svc__kernel': ['rbf']}, 
                {'svc__C': param_range , 'svc__degree': degree , 'svc__kernel':['poly']}]
                
gs = GridSearchCV(estimator=pipe_svm, 
        param_grid=param_grid, 
        scoring='f1_weighted', 
        n_jobs=-1, 
        refit=True, 
        cv=10)

gs=gs.fit(X_train , y_train)
print(gs.best_score_)
print(gs.best_params_)

0.9859260968145737
{'svc__C': 1.0, 'svc__kernel': 'linear'}


In [4]:
n_estimators = [10 , 50 , 100]
max_depth=[10 , 100 , 200]

pipe_forest = make_pipeline(RandomForestClassifier(random_state=1))

param_grid_tree = [{'randomforestclassifier__n_estimators': n_estimators, 'randomforestclassifier__criterion': ['gini'] , 'randomforestclassifier__max_depth': max_depth},
            {'randomforestclassifier__n_estimators': n_estimators, 'randomforestclassifier__criterion': ['entropy'] , 'randomforestclassifier__max_depth': max_depth}]
                
gs_t = GridSearchCV(estimator=pipe_forest, 
        param_grid=param_grid_tree, 
        scoring='f1_weighted', 
        n_jobs=-1, 
        refit=True, 
        cv=10)

gs_t=gs_t.fit(X_train , y_train)
print(gs_t.best_score_)
print(gs_t.best_params_)

0.9646921449181212
{'randomforestclassifier__criterion': 'entropy', 'randomforestclassifier__max_depth': 10, 'randomforestclassifier__n_estimators': 100}
