In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV, StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, log_loss

In [2]:
kyp = pd.read_csv(r"C:\Users\Administrator.DAI-PC2\Downloads\Shubham\Practical Machine Learning\ClassWork\Cases\Kyphosis\Kyphosis.csv")
le = LabelEncoder()
y = le.fit_transform(kyp["Kyphosis"])
X = kyp.drop('Kyphosis',axis=1)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.3, random_state=24,stratify =y)

In [None]:
## Train_test_split without X_train in final estimator

In [4]:
lr = LogisticRegression()
svm = SVC(kernel = 'linear',probability = True, random_state=24)
dtc = DecisionTreeClassifier(random_state=24)
rf = RandomForestClassifier(random_state=24)
stack = StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=rf)

In [5]:
stack.fit(X_train,y_train)

y_pred  = stack.predict(X_test)
print("accuracy = ", accuracy_score(y_test,y_pred))

y_pred_prob = stack.predict_proba(X_test)
print("log_loss = ", log_loss(y_test,y_pred_prob))

accuracy =  0.72
log_loss =  2.001191391953967


In [None]:
## Train_test_split with X_train in final estimator

In [6]:
lr = LogisticRegression()
svm = SVC(kernel = 'linear',probability = True, random_state=24)
dtc = DecisionTreeClassifier(random_state=24)
rf = RandomForestClassifier(random_state=24)
stack = StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=rf,passthrough=True)

In [7]:
stack.fit(X_train,y_train)

y_pred  = stack.predict(X_test)
print("accuracy = ", accuracy_score(y_test,y_pred))

y_pred_prob = stack.predict_proba(X_test)
print("log_loss = ", log_loss(y_test,y_pred_prob))

accuracy =  0.76
log_loss =  1.8078240247569928


In [None]:
### Gridsearch for best estimator

In [10]:
lr = LogisticRegression()
svm = SVC(kernel = 'linear',probability = True, random_state=24)
dtc = DecisionTreeClassifier(random_state=24)
rf = RandomForestClassifier(random_state=24)
stack = StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=rf,passthrough=True)
stack.get_params()

{'cv': None,
 'estimators': [('LR', LogisticRegression()),
  ('SVM', SVC(kernel='linear', probability=True, random_state=24)),
  ('TREE', DecisionTreeClassifier(random_state=24))],
 'final_estimator__bootstrap': True,
 'final_estimator__ccp_alpha': 0.0,
 'final_estimator__class_weight': None,
 'final_estimator__criterion': 'gini',
 'final_estimator__max_depth': None,
 'final_estimator__max_features': 'sqrt',
 'final_estimator__max_leaf_nodes': None,
 'final_estimator__max_samples': None,
 'final_estimator__min_impurity_decrease': 0.0,
 'final_estimator__min_samples_leaf': 1,
 'final_estimator__min_samples_split': 2,
 'final_estimator__min_weight_fraction_leaf': 0.0,
 'final_estimator__monotonic_cst': None,
 'final_estimator__n_estimators': 100,
 'final_estimator__n_jobs': None,
 'final_estimator__oob_score': False,
 'final_estimator__random_state': 24,
 'final_estimator__verbose': 0,
 'final_estimator__warm_start': False,
 'final_estimator': RandomForestClassifier(random_state=24),
 'n

In [11]:
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)
params = {"LR__C":np.linspace(0.01,3,5),"SVM__C":np.linspace(0.01,3,5),"TREE__max_depth":[None,2,3,4],"final_estimator__max_features":[2,3],"passthrough":[False, True]}
gcv = GridSearchCV(stack,param_grid=params,cv=kfold,scoring='neg_log_loss',n_jobs=-1)
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)

{'LR__C': 1.5050000000000001, 'SVM__C': 0.7575000000000001, 'TREE__max_depth': 4, 'final_estimator__max_features': 3, 'passthrough': True}
-0.41213495226387675
