In [23]:
from sklearn.pipeline import Pipeline

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.ensemble import GradientBoostingClassifier, StackingClassifier, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score, roc_auc_score, log_loss
from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import warnings
warnings.filterwarnings('ignore')

In [24]:
cancer = pd.read_csv('BreastCancer.csv',index_col=0)
cancer

Unnamed: 0_level_0,Clump,UniCell_Size,Uni_CellShape,MargAdh,SEpith,BareN,BChromatin,NoemN,Mitoses,Class
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
61634,5,4,3,1,2,2,2,3,1,Benign
63375,9,1,2,6,4,10,7,7,2,Malignant
76389,10,4,7,2,2,8,6,1,1,Malignant
95719,6,10,10,10,8,10,7,10,7,Malignant
128059,1,1,1,1,2,5,5,1,1,Benign
...,...,...,...,...,...,...,...,...,...,...
1369821,10,10,10,10,5,10,10,10,7,Malignant
1371026,5,10,10,10,4,10,5,6,3,Malignant
1371920,5,1,1,1,2,1,3,2,1,Benign
8233704,4,1,1,1,1,1,2,1,1,Benign


In [25]:
X = cancer.drop(columns='Class')
y = cancer.Class

In [26]:

knn = KNeighborsClassifier()
dtc = DecisionTreeClassifier(random_state=24)
svm = SVC(probability=True, random_state=24)
nb = GaussianNB()
rf = RandomForestClassifier(random_state=24)
kfold = StratifiedKFold(n_splits=5, shuffle=True,random_state=24)

stack = StackingClassifier(estimators=[('dtc',dtc),('svm',svm),('knn',knn),('nb',nb)],final_estimator=rf)
stack.get_params()

{'cv': None,
 'estimators': [('dtc', DecisionTreeClassifier(random_state=24)),
  ('svm', SVC(probability=True, random_state=24)),
  ('knn', KNeighborsClassifier()),
  ('nb', GaussianNB())],
 'final_estimator__bootstrap': True,
 'final_estimator__ccp_alpha': 0.0,
 'final_estimator__class_weight': None,
 'final_estimator__criterion': 'gini',
 'final_estimator__max_depth': None,
 'final_estimator__max_features': 'sqrt',
 'final_estimator__max_leaf_nodes': None,
 'final_estimator__max_samples': None,
 'final_estimator__min_impurity_decrease': 0.0,
 'final_estimator__min_samples_leaf': 1,
 'final_estimator__min_samples_split': 2,
 'final_estimator__min_weight_fraction_leaf': 0.0,
 'final_estimator__monotonic_cst': None,
 'final_estimator__n_estimators': 100,
 'final_estimator__n_jobs': None,
 'final_estimator__oob_score': False,
 'final_estimator__random_state': 24,
 'final_estimator__verbose': 0,
 'final_estimator__warm_start': False,
 'final_estimator': RandomForestClassifier(random_state

In [27]:
params = {'dtc__max_depth':[None],
          'final_estimator__max_depth': [None],
          'passthrough':[True,False],
          'svm__C': np.linspace(0.001,3,5),
          'knn__n_neighbors':[5,10]}
gcv = GridSearchCV(stack, param_grid=params, cv=kfold, scoring='neg_log_loss', verbose=3)
gcv.fit(X,y)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END dtc__max_depth=None, final_estimator__max_depth=None, knn__n_neighbors=5, passthrough=True, svm__C=0.001;, score=-0.050 total time=   0.5s
[CV 2/5] END dtc__max_depth=None, final_estimator__max_depth=None, knn__n_neighbors=5, passthrough=True, svm__C=0.001;, score=-0.325 total time=   0.5s
[CV 3/5] END dtc__max_depth=None, final_estimator__max_depth=None, knn__n_neighbors=5, passthrough=True, svm__C=0.001;, score=-0.079 total time=   0.5s
[CV 4/5] END dtc__max_depth=None, final_estimator__max_depth=None, knn__n_neighbors=5, passthrough=True, svm__C=0.001;, score=-0.361 total time=   0.5s
[CV 5/5] END dtc__max_depth=None, final_estimator__max_depth=None, knn__n_neighbors=5, passthrough=True, svm__C=0.001;, score=-0.111 total time=   0.4s
[CV 1/5] END dtc__max_depth=None, final_estimator__max_depth=None, knn__n_neighbors=5, passthrough=True, svm__C=0.75075;, score=-0.058 total time=   0.3s
[CV 2/5] END dtc__max_de

In [28]:
print("Best score: ",gcv.best_params_)
print("Best Score: ", gcv.best_score_)

Best score:  {'dtc__max_depth': None, 'final_estimator__max_depth': None, 'knn__n_neighbors': 10, 'passthrough': True, 'svm__C': 1.5005}
Best Score:  -0.09394181561687927


In [29]:
best_stack = gcv.best_estimator_

In [30]:
import pickle
pkfile = open('stack_cancer.pkl','wb')
pickle.dump(best_stack,pkfile)

In [31]:
infile = open('stack_cancer.pkl','rb')
objLoad = pickle.load(infile)
objLoad

In [32]:
# tst_sat = pd.read_csv('tst_satellite.csv')
# tst_sat

In [33]:
# tst_sat['pred_type'] = objLoad.predict(tst_sat)

In [34]:
# tst_sat

NameError: name 'tst_sat' is not defined

# Gradio

In [37]:
import gradio as gr
import pickle
#import numpy as np 
import pandas as pd 
import os 
os.chdir('C:/Users/DAI.STUDENTSDC/Desktop/ /Machine Learning/Day 11')

def predict(Clump, UniCell_Size, Uni_CellShape, MargAdh, SEpith, BareN, BChromatin, NoemN, Mitoses):
    tst = pd.DataFrame([[Clump, UniCell_Size, Uni_CellShape, MargAdh, SEpith, BareN, BChromatin, NoemN, Mitoses]],
          columns=['Clump', 'UniCell_Size', 'Uni_CellShape', 'MargAdh', 'SEpith', 'BareN', 'BChromatin', 'NoemN', 'Mitoses'])    
    filehandler = open("stack_cancer.pkl", "rb")
    bm_loaded = pickle.load(filehandler)
    print(tst)
    return bm_loaded.predict(tst)[0] 
      

# demo = gr.Interface(
#     fn=predict,
#     inputs=["number"] * 9,
#     outputs=["text"]
# )

with gr.Blocks() as demo:
    with gr.Row():
      RI = gr.Number(label='Clump')
      Na = gr.Number(label='UniCell_Size')
      Mg = gr.Number(label='Uni_CellShape')
    with gr.Row():
      Al = gr.Number(label='MargAdh')
      Si = gr.Number(label='SEpith')
      K = gr.Number(label='BareN')
    with gr.Row():
      Ca = gr.Number(label='BChromatin')
      Ba = gr.Number(label='NoemN')
      Fe = gr.Number(label='Mitoses')
    with gr.Row(): 
      Type = gr.Text(label='Cancer Type') 
    with gr.Row():  
      button = gr.Button(value="Cancer Type?")
      button.click(predict,
            inputs=[RI, Na, Mg, Al, Si, K, Ca, Ba, Fe],
            outputs=[Type])
demo.launch()

* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




   Clump  UniCell_Size  Uni_CellShape  MargAdh  SEpith  BareN  BChromatin  \
0      5             4              3        1       2      2           2   

   NoemN  Mitoses  
0      3        1  
