In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import precision_score, recall_score, precision_recall_curve,f1_score, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from mlxtend.classifier import StackingClassifier 
import xgboost as xgb

### First Let's load our data.
Starting with the smallest dataset for simplicity

In [2]:
x = np.memmap('data/gray_data_360', mode='r', shape = (150000,360))
# x = np.load('data/gray_data_20')
y = np.memmap('data/image_target.npy', mode='r', shape = (150000,))
x_test = np.memmap('data/test_gray_data_360.npy', mode='r', shape = (50000,360))
y_test = np.memmap('data/test_target.npy', mode='r', shape = (50000))
print(x.shape)
print(y.shape)
x = pd.DataFrame(x)
y = pd.DataFrame(y)
# y = y == 2
# y = y.astype(int)
# y_test = y_test == 2
# y_test = y_test.astype(int)
x = x/255
x_test = x_test/255

(150000, 360)
(150000,)


"target_data": {
"Inco": 2, 
"Teac": 1, 
"Cons": 0, 
"Publ": 4, 
"Econ": 3}}

In [3]:
X_train, X_val, y_train, y_val = train_test_split(x, y[0], test_size=0.2, stratify=y)
print(X_train.shape)
print(y_val.shape)

(120000, 360)
(30000,)


## KNN
K nearest neighbors

~~~
Accuracy:  0.218
Precision:	0.21878291972161956
Recall:	0.21799999999999997
F1:	0.2158672632176471
array([[1658, 1364, 1110,  993,  875],
       [1541, 1545, 1138,  965,  811],
       [1557, 1340, 1286,  961,  856],
       [1567, 1341, 1192, 1059,  841],
       [1590, 1370, 1090,  958,  992]])
~~~


In [35]:
knn = KNeighborsClassifier(n_neighbors=40, n_jobs=-1)
knn.fit(X_train, y_train)
y_pred = knn.predict(x_test,)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

Accuracy:	0.24358
Precision:	0.24554351463873764
Recall:	0.24358
F1:	0.24218200464796627


array([[2987, 2054, 2038, 1411, 1510],
       [2528, 2634, 1934, 1425, 1479],
       [2489, 2058, 2550, 1395, 1508],
       [2430, 2079, 2004, 2016, 1471],
       [2529, 2059, 1985, 1435, 1992]])

## Logistic Regression

~~~
Accuracy:	0.20534
Precision:	0.20556761017314962
Recall:	0.20534
F1:	0.20329858294811992
array([[1883, 2415, 2303, 2030, 1369],
       [1716, 2481, 2369, 2067, 1367],
       [1721, 2372, 2430, 2124, 1353],
       [1661, 2522, 2348, 2085, 1384],
       [1718, 2400, 2401, 2093, 1388]])
~~~


In [31]:
lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(x_test,)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

Accuracy:	0.20122
Precision:	0.20127568026703954
Recall:	0.20122
F1:	0.201213063006084


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


array([[2045, 2050, 2029, 1976, 1900],
       [2085, 2051, 1946, 2007, 1911],
       [2006, 2108, 2013, 2075, 1798],
       [1994, 2006, 1950, 1995, 2055],
       [2016, 2188, 1924, 1915, 1957]])

In [36]:
grid={"C":np.logspace(-3,3,7), "penalty":["l2"], "solver":['liblinear','newton-cg']}
lr=LogisticRegression()
lr_cv=GridSearchCV(lr,grid,cv=3,scoring='f1_macro')
lr_cv.fit(X_train, y_train)

GridSearchCV(cv=3, estimator=LogisticRegression(),
             param_grid={'C': array([1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02, 1.e+03]),
                         'penalty': ['l2'],
                         'solver': ['liblinear', 'newton-cg']},
             scoring='f1_macro')

In [37]:
res = pd.DataFrame(lr_cv.cv_results_)
res.sort_values(by='rank_test_score', ascending=True).head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_penalty,param_solver,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
4,17.900504,0.3581,0.035245,0.000225,0.1,l2,liblinear,"{'C': 0.1, 'penalty': 'l2', 'solver': 'libline...",0.225605,0.226902,0.227548,0.226685,0.000808,1
10,20.817865,0.284753,0.036785,0.002518,100.0,l2,liblinear,"{'C': 100.0, 'penalty': 'l2', 'solver': 'libli...",0.225301,0.22677,0.227442,0.226504,0.000894,2
12,20.882647,0.177206,0.035058,0.000802,1000.0,l2,liblinear,"{'C': 1000.0, 'penalty': 'l2', 'solver': 'libl...",0.2253,0.226746,0.227417,0.226488,0.000883,3
8,20.669345,0.030453,0.035962,0.000987,10.0,l2,liblinear,"{'C': 10.0, 'penalty': 'l2', 'solver': 'liblin...",0.22525,0.226771,0.227417,0.226479,0.000909,4
2,13.738656,0.031851,0.034326,0.000109,0.01,l2,liblinear,"{'C': 0.01, 'penalty': 'l2', 'solver': 'liblin...",0.22531,0.227052,0.227073,0.226478,0.000826,5


In [41]:
lr = LogisticRegression(C=1,penalty='l2',solver='newton-cg')
lr.fit(X_train, y_train)
y_pred = lr.predict(x_test,)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

Accuracy:	0.22542
Precision:	0.2254166168175497
Recall:	0.22542
F1:	0.2253968045432373


array([[2162, 2033, 1902, 1918, 1985],
       [1840, 2305, 1941, 1966, 1948],
       [1940, 2027, 2243, 1851, 1939],
       [1917, 1954, 1948, 2247, 1934],
       [1923, 1994, 1928, 1841, 2314]])

## Decision Tree

~~~
Accuracy:	0.20046
Precision:	0.20043461791314954
Recall:	0.20046
F1:	0.20025358642806665
array([[2236, 2066, 2003, 1891, 1804],
       [2183, 2051, 1982, 1969, 1815],
       [2319, 1968, 1953, 1921, 1839],
       [2110, 2065, 2062, 1937, 1826],
       [2149, 2070, 2005, 1930, 1846]])
~~~


In [40]:
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred = dt.predict(x_test,)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

Accuracy:	0.20424
Precision:	0.2042618378400461
Recall:	0.20423999999999998
F1:	0.2042340751277937


array([[2058, 1995, 1970, 1964, 2013],
       [1953, 2067, 1935, 1938, 2107],
       [2017, 2027, 1978, 1943, 2035],
       [1973, 1974, 1914, 2046, 2093],
       [1950, 1971, 1946, 2070, 2063]])

In [74]:
grid={"criterion":["gini",'entropy'],'min_samples_split':[2,6,14,32,64], 'random_state':[42, None] }
dt=DecisionTreeClassifier()
dt_cv=GridSearchCV(dt,grid,cv=3,scoring='f1_macro')
dt_cv.fit(X_train, y_train)

GridSearchCV(cv=3, estimator=DecisionTreeClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'min_samples_split': [2, 6, 14, 32, 64],
                         'random_state': [42, None]},
             scoring='f1_macro')

In [75]:
res = pd.DataFrame(dt_cv.cv_results_)
res.sort_values(by='rank_test_score', ascending=True).head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_min_samples_split,param_random_state,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,2.550681,0.081907,0.029533,0.002559,gini,2,42.0,"{'criterion': 'gini', 'min_samples_split': 2, ...",0.214716,0.216214,0.217456,0.216128,0.00112,1
1,2.721425,0.208265,0.032089,0.003546,gini,2,,"{'criterion': 'gini', 'min_samples_split': 2, ...",0.217047,0.21396,0.217119,0.216042,0.001472,2
12,3.708585,0.046757,0.031218,0.002523,entropy,6,42.0,"{'criterion': 'entropy', 'min_samples_split': ...",0.217093,0.212089,0.218641,0.215941,0.002796,3
15,3.489973,0.109457,0.030265,0.000409,entropy,14,,"{'criterion': 'entropy', 'min_samples_split': ...",0.214798,0.213864,0.21896,0.215874,0.002215,4
14,3.453665,0.101807,0.029545,0.002062,entropy,14,42.0,"{'criterion': 'entropy', 'min_samples_split': ...",0.215472,0.213849,0.218251,0.215857,0.001817,5


In [83]:
dt = DecisionTreeClassifier(criterion='entropy', min_samples_split=64, random_state=42)
dt.fit(X_train, y_train)
y_pred = dt.predict(x_test,)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

Accuracy:	0.20046
Precision:	0.20043461791314954
Recall:	0.20046
F1:	0.20025358642806665


array([[2236, 2066, 2003, 1891, 1804],
       [2183, 2051, 1982, 1969, 1815],
       [2319, 1968, 1953, 1921, 1839],
       [2110, 2065, 2062, 1937, 1826],
       [2149, 2070, 2005, 1930, 1846]])

## Random Forest

~~~
Accuracy:	0.20516
Precision:	0.2051212463423299
Recall:	0.20515999999999995
F1:	0.20510088356551712
array([[2072, 2056, 2049, 1907, 1916],
       [2066, 2011, 2039, 1894, 1990],
       [2003, 1998, 2069, 1880, 2050],
       [1978, 2028, 2048, 1920, 2026],
       [2046, 1988, 1930, 1850, 2186]])
~~~


In [39]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(x_test,)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

Accuracy:	0.22382
Precision:	0.22412490535754753
Recall:	0.22382
F1:	0.2236522203554892


array([[2360, 2040, 1979, 1786, 1835],
       [2174, 2392, 1893, 1830, 1711],
       [2225, 2040, 2280, 1734, 1721],
       [2160, 2034, 1938, 2112, 1756],
       [2175, 2003, 1992, 1783, 2047]])

In [94]:
rf = RandomForestClassifier(criterion='entropy', min_samples_split=64, random_state=42, max_features=4, n_jobs=-1)
rf.fit(X_train, y_train)
y_pred = rf.predict(x_test,)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

Accuracy:	0.2027
Precision:	0.20268895335050835
Recall:	0.20270000000000002
F1:	0.202622174130087


array([[2083, 2024, 2019, 1873, 2001],
       [2058, 2068, 1977, 1889, 2008],
       [2100, 2035, 1959, 1889, 2017],
       [2013, 2153, 1972, 1890, 1972],
       [2099, 2041, 1928, 1797, 2135]])

In [96]:
rf = RandomForestClassifier(min_samples_split=32, max_features=4, n_jobs=-1)
rf.fit(X_train, y_train)
y_pred = rf.predict(x_test,)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

Accuracy:	0.20516
Precision:	0.2051212463423299
Recall:	0.20515999999999995
F1:	0.20510088356551712


array([[2072, 2056, 2049, 1907, 1916],
       [2066, 2011, 2039, 1894, 1990],
       [2003, 1998, 2069, 1880, 2050],
       [1978, 2028, 2048, 1920, 2026],
       [2046, 1988, 1930, 1850, 2186]])

## XGBoost

~~~
Accuracy:	0.2025
Precision:	0.2025738072573832
Recall:	0.20249999999999999
F1:	0.20229736964345638
array([[2208, 2007, 1772, 2025, 1988],
       [2150, 2057, 1745, 2094, 1954],
       [2183, 1989, 1815, 2045, 1968],
       [2207, 2052, 1764, 2063, 1914],
       [2219, 1989, 1775, 2035, 1982]])
~~~


In [27]:
gbm = xgb.XGBClassifier( 
                        n_estimators=30000,
                        max_depth=4,
                        objective='multi:softmax', #new objective
                        learning_rate=.05, 
                        subsample=.8,
                        min_child_weight=3,
                        colsample_bytree=.8,
                        use_label_encoder=False
                       )

eval_set=[(X_train,y_train),(X_val,y_val)]
fit_model = gbm.fit( 
                    X_train, y_train, 
                    eval_set=eval_set,
                    eval_metric='merror',
                    early_stopping_rounds=50,
                    verbose=False
                   )

y_pred = gbm.predict(x_test)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


Accuracy:	0.23016
Precision:	0.23081969522554133
Recall:	0.23015999999999998
F1:	0.22896671987642522


array([[2708, 1889, 1967, 1373, 2063],
       [2247, 2255, 2075, 1400, 2023],
       [2353, 1917, 2411, 1357, 1962],
       [2310, 1993, 1986, 1728, 1983],
       [2292, 1921, 2042, 1339, 2406]])

In [121]:
# gbm = xgb.XGBClassifier(use_label_encoder=False)
# # grid={'max_depth': [2, 6, 12],'n_estimators': [50, 100, 200], 'learning_rate':[0.3,.05],
# #         'objective':['multi:softmax', 'multi:softprob'],'colsample_bytree':[0.2,0.5,0.8]}
# grid={'max_depth': [2, 12],'n_estimators': [50, 200],
#         'objective':['multi:softmax'],'colsample_bytree':[0.8]}
# gbm_cv=GridSearchCV(gbm,grid,cv=3,scoring='f1_macro', n_jobs=-1)
# gbm_cv.fit(X_train, y_train)

In [None]:
# res = pd.DataFrame(gbm_cv.cv_results_)
# res.sort_values(by='rank_test_score', ascending=True).head(10)

In [122]:
gbm = xgb.XGBClassifier( 
                        n_estimators=200,
                        max_depth=12,
                        objective='multi:softmax', #new objective
                        learning_rate=.03, 
                        min_child_weight=3,
                        colsample_bytree=.4,
                        use_label_encoder=False
                       )

eval_set=[(X_train,y_train),(X_val,y_val)]
fit_model = gbm.fit( 
                    X_train, y_train, 
                    eval_set=eval_set,
                    eval_metric='merror',
                    early_stopping_rounds=50,
                    verbose=False
                   )

y_pred = gbm.predict(x_test)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


Accuracy:	0.20294
Precision:	0.20297739832653744
Recall:	0.20293999999999998
F1:	0.20288853644668597


array([[2018, 2166, 1941, 1913, 1962],
       [2039, 2060, 1925, 1933, 2043],
       [2146, 2073, 1923, 1900, 1958],
       [2052, 2059, 1884, 1959, 2046],
       [2040, 2044, 1828, 1901, 2187]])

## Naive Bayes

~~~
Accuracy:	0.2025
Precision:	0.2025738072573832
Recall:	0.20249999999999999
F1:	0.20229736964345638
array([[2208, 2007, 1772, 2025, 1988],
       [2150, 2057, 1745, 2094, 1954],
       [2183, 1989, 1815, 2045, 1968],
       [2207, 2052, 1764, 2063, 1914],
       [2219, 1989, 1775, 2035, 1982]])
~~~

In [26]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(x_test,)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

Accuracy:	0.22816
Precision:	0.22821810148540583
Recall:	0.22816
F1:	0.22813622601864608


array([[2231, 2018, 1889, 2031, 1831],
       [1898, 2312, 1925, 2022, 1843],
       [1963, 1956, 2280, 1984, 1817],
       [1897, 1970, 1936, 2367, 1830],
       [1941, 1944, 1864, 2033, 2218]])

## SVM

~~~
Accuracy:	0.20268
Precision:	0.20283658864486426
Recall:	0.20268000000000003
F1:	0.200649114635904
array([[1715, 2331, 2302, 2231, 1421],
       [1595, 2355, 2343, 2270, 1437],
       [1571, 2266, 2376, 2342, 1445],
       [1530, 2449, 2272, 2286, 1463],
       [1601, 2318, 2356, 2323, 1402]])
~~~

In [7]:
svm = LinearSVC()
svm.fit(X_train, y_train)
y_pred = svm.predict(x_test,)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

Accuracy:	0.22568
Precision:	0.2266351415659172
Recall:	0.22568000000000002
F1:	0.2244179949029156




array([[1999, 2014, 2597, 1769, 1621],
       [1703, 2310, 2639, 1734, 1614],
       [1733, 2032, 2931, 1707, 1597],
       [1732, 1967, 2658, 2083, 1560],
       [1689, 2020, 2618, 1712, 1961]])

In [9]:
svm = SVC()
svm.fit(X_train, y_train)
y_pred = svm.predict(x_test,)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

## Ensemble of Models

~~~
Accuracy:	0.2025
Precision:	0.2025738072573832
Recall:	0.20249999999999999
F1:	0.20229736964345638
array([[2208, 2007, 1772, 2025, 1988],
       [2150, 2057, 1745, 2094, 1954],
       [2183, 1989, 1815, 2045, 1968],
       [2207, 2052, 1764, 2063, 1914],
       [2219, 1989, 1775, 2035, 1982]])
~~~

In [43]:
# create voting classifier
model_list = [
    ('knn', knn),
    ('lr', lr),
    ('dt', dt),
    ('rf', rf),
    ('nb', gnb)
]
voting_classifer = VotingClassifier(estimators=model_list,
                                    voting='hard', #<-- sklearn calls this hard voting
                                    n_jobs=-1)
voting_classifer.fit(X_train, y_train)
y_pred = voting_classifer.predict(x_test)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)


Accuracy:	0.23224
Precision:	0.23481330943705955
Recall:	0.23224
F1:	0.22896118897966494


array([[3172, 2208, 1839, 1510, 1271],
       [2823, 2693, 1810, 1452, 1222],
       [2825, 2251, 2257, 1457, 1210],
       [2810, 2253, 1829, 1898, 1210],
       [2851, 2219, 1844, 1494, 1592]])

In [44]:

voting_classifer = VotingClassifier(estimators=model_list,
                                    voting='soft', #<-- sklearn calls this hard voting
                                    n_jobs=-1)
voting_classifer.fit(X_train, y_train)
y_pred = voting_classifer.predict(x_test)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)


Accuracy:	0.22386
Precision:	0.22386667901343751
Recall:	0.22386
F1:	0.2238526081838068


array([[2181, 1963, 1947, 1952, 1957],
       [1875, 2268, 1936, 1969, 1952],
       [1909, 1985, 2248, 1919, 1939],
       [1897, 2022, 1868, 2228, 1985],
       [1931, 1926, 1888, 1987, 2268]])

In [45]:
model_list = [knn,lr,dt,rf]
stacked = StackingClassifier(
    classifiers=model_list, meta_classifier=LogisticRegression(n_jobs=-1), use_probas=False)
stacked.fit(X_train, y_train)
y_pred = stacked.predict(x_test)
print(f'Accuracy:\t{accuracy_score(y_test, y_pred)}')
print(f'Precision:\t{precision_score(y_test, y_pred, average="macro")}')
print(f'Recall:\t{recall_score(y_test, y_pred, average="macro")}')
print(f'F1:\t{f1_score(y_test, y_pred, average="macro")}')
confusion_matrix(y_test, y_pred)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy:	0.20942
Precision:	0.21746255034331302
Recall:	0.20942
F1:	0.19530824327728674


array([[1030, 2684, 3488, 2215,  583],
       [ 950, 2696, 3513, 2222,  619],
       [ 868, 2793, 3491, 2244,  604],
       [ 853, 2554, 3457, 2460,  676],
       [ 826, 2532, 3486, 2362,  794]])

### More Evaluation


In [47]:
y_pred = voting_classifer.predict_proba(x_test)

In [48]:
y_pred

array([[0.15937302, 0.12098713, 0.17833713, 0.40606061, 0.13524211],
       [0.1367587 , 0.15435234, 0.14926824, 0.38481209, 0.17480863],
       [0.15269628, 0.14886068, 0.18828382, 0.12502046, 0.38513876],
       ...,
       [0.14162417, 0.39984444, 0.14350232, 0.18189237, 0.1331367 ],
       [0.16447874, 0.20700267, 0.15188083, 0.12690068, 0.34973708],
       [0.5871458 , 0.05535225, 0.25366229, 0.04987202, 0.05396765]])