In [1]:
import time
import random
from math import *
import operator
import pandas as pd
import numpy as np
pd.set_option("display.max_columns", 10000)
import string
from pprint import pprint
from scipy import stats
import itertools

# import plotting libraries
import matplotlib
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
from matplotlib import style
%matplotlib inline 

from mlxtend.plotting import plot_decision_regions
import matplotlib.gridspec as gridspec

import seaborn as sns
sns.set(style="white", color_codes=True)
sns.set(font_scale=1.5)

# load make_blobs to simulate data
from sklearn.datasets import make_blobs
from sklearn.datasets import make_classification
from sklearn.datasets import make_regression

# import the ML algorithm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from statsmodels.tools.eval_measures import rmse
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.cluster import KMeans
from sklearn.linear_model import Ridge
from sklearn.svm import SVR

from mlxtend.classifier import StackingClassifier
from mlxtend.regressor import StackingRegressor
from mlxtend.regressor import StackingCVRegressor

# pre-processing
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import scale
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing.data import QuantileTransformer
from sklearn.preprocessing import Imputer
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.feature_selection import RFECV

# import libraries for model validation
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut 

# import libraries for metrics and reporting
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import adjusted_rand_score

In [2]:
# Loading dataset and view a few records.
df = pd.read_csv('E:\\MYLEARN\\2-ANALYTICS-DataScience\\datasets\\glass.csv')

In [3]:
df.head()

Unnamed: 0,Id,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,2,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [4]:
df.shape

(214, 11)

In [5]:
# Train test split
attributes = list(df.columns[:8])

X = df[attributes].values
y = df['RI'].values    

# Split into train and test sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state =0)

In [7]:
# base estimators
lr      = LinearRegression()
svr_lin = SVR(kernel='linear')
ridge   = Ridge(random_state=1)

# meta estimator
svr_rbf = SVR(kernel='rbf')

In [8]:
# instantiate the stacking model
stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], 
                           meta_regressor=svr_rbf)

In [9]:
# Training the stacking classifier
stregr.fit(X, y)
stregr.predict(X)



array([1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254, 1.52254,
       1.52254, 1.52

In [10]:
# Evaluate and visualize the fit

print("Mean Squared Error: %.4f"
      % np.mean((stregr.predict(X) - y) ** 2))

print('Variance Score: %.4f' % stregr.score(X, y))


Mean Squared Error: 0.0000
Variance Score: -1.8985


In [11]:
for reg, label in zip([svr_lin, lr, ridge, svr_rbf], 
                      ['SVR LIN', 
                       'LIN REG', 
                       'RIDGE',
                       'SVR RBF']):
    
    # Mean squared error regression loss
    scores = cross_val_score(reg, 
                             X, 
                             y, 
                             cv=3, 
                             scoring='neg_mean_squared_error')
    
    
    #print(scores)
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" 
          % (scores.mean(), 
             scores.std(), 
             label))

Accuracy: -0.00 (+/- 0.00) [SVR LIN]
Accuracy: -0.00 (+/- 0.00) [LIN REG]
Accuracy: -0.00 (+/- 0.00) [RIDGE]




Accuracy: -0.00 (+/- 0.00) [SVR RBF]


In [12]:
# with plt.style.context(('seaborn-whitegrid')):
#     plt.scatter(X, y, c='lightgray')
#     plt.plot(X, stregr.predict(X), c='darkgreen', lw=2)

# plt.show()