In [2]:
from sklearn.pipeline import Pipeline

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.ensemble import GradientBoostingClassifier, StackingClassifier, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score, roc_auc_score, log_loss
from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import warnings
warnings.filterwarnings('ignore')

In [4]:
satellite = pd.read_csv('Satellite.csv',sep=';')
satellite

Unnamed: 0,x.1,x.2,x.3,x.4,x.5,x.6,x.7,x.8,x.9,x.10,...,x.28,x.29,x.30,x.31,x.32,x.33,x.34,x.35,x.36,classes
0,92,115,120,94,84,102,106,79,84,102,...,104,88,121,128,100,84,107,113,87,grey soil
1,84,102,106,79,84,102,102,83,80,102,...,100,84,107,113,87,84,99,104,79,grey soil
2,84,102,102,83,80,102,102,79,84,94,...,87,84,99,104,79,84,99,104,79,grey soil
3,80,102,102,79,84,94,102,79,80,94,...,79,84,99,104,79,84,103,104,79,grey soil
4,84,94,102,79,80,94,98,76,80,102,...,79,84,103,104,79,79,107,109,87,grey soil
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6430,60,83,96,85,64,87,100,88,64,83,...,92,66,87,108,89,63,83,104,85,red soil
6431,64,79,100,85,56,71,96,85,56,68,...,85,66,83,100,85,63,83,100,81,red soil
6432,56,68,91,81,56,64,91,81,53,64,...,81,59,87,96,81,63,83,92,74,vegetation stubble
6433,56,68,87,74,60,71,91,81,60,64,...,74,59,83,92,74,59,83,92,70,vegetation stubble


In [8]:
X = satellite.drop(columns='classes')
y = satellite.classes

In [9]:

knn = KNeighborsClassifier()
dtc = DecisionTreeClassifier(random_state=24)
svm = SVC(probability=True, random_state=24)
nb = GaussianNB()
rf = RandomForestClassifier(random_state=24)
kfold = StratifiedKFold(n_splits=5, shuffle=True,random_state=24)

stack = StackingClassifier(estimators=[('dtc',dtc),('svm',svm),('knn',knn),('nb',nb)],final_estimator=rf)
stack.get_params()

{'cv': None,
 'estimators': [('dtc', DecisionTreeClassifier(random_state=24)),
  ('svm', SVC(probability=True, random_state=24)),
  ('knn', KNeighborsClassifier()),
  ('nb', GaussianNB())],
 'final_estimator__bootstrap': True,
 'final_estimator__ccp_alpha': 0.0,
 'final_estimator__class_weight': None,
 'final_estimator__criterion': 'gini',
 'final_estimator__max_depth': None,
 'final_estimator__max_features': 'sqrt',
 'final_estimator__max_leaf_nodes': None,
 'final_estimator__max_samples': None,
 'final_estimator__min_impurity_decrease': 0.0,
 'final_estimator__min_samples_leaf': 1,
 'final_estimator__min_samples_split': 2,
 'final_estimator__min_weight_fraction_leaf': 0.0,
 'final_estimator__monotonic_cst': None,
 'final_estimator__n_estimators': 100,
 'final_estimator__n_jobs': None,
 'final_estimator__oob_score': False,
 'final_estimator__random_state': 24,
 'final_estimator__verbose': 0,
 'final_estimator__warm_start': False,
 'final_estimator': RandomForestClassifier(random_state

In [12]:
params = {'dtc__max_depth':[None],
          'final_estimator__max_depth': [None],
          'passthrough':[True,False],
          'svm__C': np.linspace(0.001,3,5)}
gcv = GridSearchCV(stack, param_grid=params, cv=kfold, scoring='neg_log_loss', verbose=3)
gcv.fit(X,y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END dtc__max_depth=None, final_estimator__max_depth=None, passthrough=True, svm__C=0.001;, score=-0.312 total time= 1.3min
[CV 2/5] END dtc__max_depth=None, final_estimator__max_depth=None, passthrough=True, svm__C=0.001;, score=-0.229 total time= 1.2min
[CV 3/5] END dtc__max_depth=None, final_estimator__max_depth=None, passthrough=True, svm__C=0.001;, score=-0.267 total time= 1.3min
[CV 4/5] END dtc__max_depth=None, final_estimator__max_depth=None, passthrough=True, svm__C=0.001;, score=-0.275 total time= 1.3min
[CV 5/5] END dtc__max_depth=None, final_estimator__max_depth=None, passthrough=True, svm__C=0.001;, score=-0.262 total time= 1.3min
[CV 1/5] END dtc__max_depth=None, final_estimator__max_depth=None, passthrough=True, svm__C=0.75075;, score=-0.306 total time=  16.0s
[CV 2/5] END dtc__max_depth=None, final_estimator__max_depth=None, passthrough=True, svm__C=0.75075;, score=-0.224 total time=  16.5s
[CV 3/5] EN

In [13]:
print("Best score: ",gcv.best_params_)
print("Best Score: ", gcv.best_score_)

Best score:  {'dtc__max_depth': None, 'final_estimator__max_depth': None, 'passthrough': True, 'svm__C': 2.25025}
Best Score:  -0.23901615939827425


In [14]:
best_stack = gcv.best_estimator_

In [15]:
import pickle
pkfile = open('stack_sat.pkl','wb')
pickle.dump(best_stack,pkfile)

In [16]:
infile = open('stack_gls.pkl','rb')
objLoad = pickle.load(infile)

In [17]:
tst_sat = pd.read_csv('tst_satellite.csv')
tst_sat

Unnamed: 0,x.1,x.2,x.3,x.4,x.5,x.6,x.7,x.8,x.9,x.10,...,x.27,x.28,x.29,x.30,x.31,x.32,x.33,x.34,x.35,x.36
0,104,97,106,79,94,91,85,87,106,92,...,100,80,110,102,87,105,89,81,100,75
1,99,105,99,95,101,110,91,101,96,83,...,80,84,75,107,85,94,100,96,79,110
2,98,78,91,104,105,103,84,91,106,82,...,95,81,76,99,97,95,88,78,103,75
3,75,98,98,104,89,90,100,81,88,88,...,108,86,88,86,106,89,76,79,79,91
4,92,108,89,89,92,108,78,94,84,88,...,91,106,84,106,96,81,91,76,84,106
5,108,100,96,90,90,94,95,87,82,82,...,92,104,84,87,89,80,96,97,106,102
6,82,91,89,96,102,81,77,108,76,83,...,108,95,96,90,80,85,86,92,104,101
7,93,108,81,75,86,82,77,106,91,89,...,91,75,88,105,99,91,105,109,104,84
8,101,78,82,103,77,87,97,75,91,99,...,83,83,90,98,98,82,95,87,88,87
9,88,107,90,81,97,105,79,109,75,103,...,105,87,96,91,110,89,106,106,104,107


In [18]:
tst_sat['pred_type'] = objLoad.predict(tst_sat)

ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- x.1
- x.10
- x.11
- x.12
- x.13
- ...
Feature names seen at fit time, yet now missing:
- Al
- Ba
- Ca
- Fe
- K
- ...


In [None]:
tst_sat