In [1]:
import pandas as pd 

# Read data to pandas dataframe 
dataframe = pd.read_csv("abalone.data", header=None)
dataframe.columns = ['sex', 'length', 'diameter', 'height', 'whole_weight', 'shucked_weight', 'viscera_weight', "shell_weight", "rings"]

# Convet to numerical categories 
def replace_sex(sex):
    return {
        'M' : 0, 
        'F' : 1, 
        'I' : 2
    }[sex]

dataframe["sex"] = dataframe["sex"].apply(replace_sex)

In [2]:
from sklearn.model_selection import train_test_split
X = dataframe.drop(columns=['sex'])
y = dataframe['sex']
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

In [4]:
# Import created models 
import pickle 
# Autosklear 
clf_autosklearn = pickle.load(open('autosklearn_abalone.sav', 'rb'))
sklearn_pred = clf_autosklearn.predict(X_test)
# OBOE 
clf_oboe = pickle.load(open('oboe_abalone.sav', 'rb'))
oboe_pred = clf_oboe.predict(X_test)[0]
# TPOT 
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures


# Average CV score on the training set was: 0.5674825237417542
exported_pipeline = make_pipeline(
    PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),
    PCA(iterated_power=10, svd_solver="randomized"),
    RandomForestClassifier(bootstrap=False, criterion="entropy", max_features=0.5, min_samples_leaf=17, min_samples_split=15, n_estimators=100)
)

exported_pipeline.fit(X_train, y_train)
tpot_pred = exported_pipeline.predict(X_test)

In [5]:
from sklearn.metrics import accuracy_score

print(accuracy_score(y_test,sklearn_pred))
print(accuracy_score(y_test,oboe_pred))
print(accuracy_score(y_test,tpot_pred))

# The results obtained the first time (we have diffirents results because we have a diffirent split of data)
# Question (The actual accuracy ?)
# tpot score:		  0.5729665071770335
# autosklern score:	  0.5729665071770335
# oboe score:		  0.5633971291866029

0.7416267942583732
0.8528708133971292
0.5311004784688995


In [6]:
## Ensemble voting  
from sklearn.metrics import accuracy_score

# Create dataframe with classifiers predictions 
voting = pd.DataFrame(list(zip(sklearn_pred, oboe_pred, tpot_pred)), columns=[ "AutoSklearn_Class", "Oboe_Class", "TPOT_Class"])

# Chose the class with most votes or the most accurate one (oboe)
def function(x) :
    if [x['AutoSklearn_Class'],x['Oboe_Class'], x['TPOT_Class']].count(0) >= 2 :
        return 0
    elif [x['AutoSklearn_Class'],x['Oboe_Class'], x['TPOT_Class']].count(1)>=2 :
        return 1
    elif  [x['AutoSklearn_Class'],x['Oboe_Class'], x['TPOT_Class']].count(2) >= 2 :
        return 2
    else :
        return x['Oboe_Class']

voting['ensemble'] = voting.apply(function, axis=1)

print(accuracy_score(y_test, voting['ensemble'] ))

0.7523923444976076


In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test = train_test_split(dataframe, test_size=0.2)

In [15]:
X_train

Unnamed: 0,sex,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,rings
2731,2,0.410,0.315,0.100,0.3000,0.1240,0.0575,0.100,8
4127,2,0.550,0.445,0.110,0.7935,0.3780,0.1420,0.260,10
2934,1,0.620,0.475,0.150,0.9545,0.4550,0.1865,0.277,9
540,1,0.500,0.375,0.140,0.6040,0.2420,0.1415,0.179,15
1568,2,0.470,0.355,0.125,0.4990,0.2100,0.0985,0.155,8
...,...,...,...,...,...,...,...,...,...
1565,2,0.460,0.355,0.110,0.4255,0.2015,0.0810,0.130,7
1881,2,0.550,0.450,0.130,0.8040,0.3375,0.1405,0.230,6
1797,2,0.585,0.450,0.150,0.8915,0.3975,0.2035,0.253,8
1257,2,0.430,0.340,0.000,0.4280,0.2065,0.0860,0.115,8


In [18]:
## Bagging 
from sklearn.utils import resample

X1 = resample(X_train, n_samples=2230)
X2 = resample(X_train, n_samples=2230)
X3 = resample(X_train, n_samples=2230)

# TODO Launch training on each bootsrap by a different model then combine the results 


In [22]:
# TPOT 
from tpot import TPOTClassifier
clf_tpot = TPOTClassifier(verbosity=2, max_time_mins=30 )
clf_tpot.fit(X1.drop(columns=['sex']), X1['sex'])


Generation 1 - Current best internal CV score: 0.7426008968609865

Generation 2 - Current best internal CV score: 0.7461883408071749

Generation 3 - Current best internal CV score: 0.7461883408071749

Generation 4 - Current best internal CV score: 0.7538116591928252

Generation 5 - Current best internal CV score: 0.7547085201793722

Generation 6 - Current best internal CV score: 0.7560538116591928

Generation 7 - Current best internal CV score: 0.7560538116591928

Generation 8 - Current best internal CV score: 0.7645739910313901

30.02 minutes have elapsed. TPOT will close down.
TPOT closed during evaluation in one generation.


TPOT closed prematurely. Will use the current best pipeline.

Best pipeline: KNeighborsClassifier(Normalizer(RandomForestClassifier(input_matrix, bootstrap=True, criterion=gini, max_features=0.55, min_samples_leaf=10, min_samples_split=9, n_estimators=100), norm=l2), n_neighbors=67, p=2, weights=distance)


TPOTClassifier(max_time_mins=30, verbosity=2)

In [23]:
# AutoSklearn 
import autosklearn.classification
clf_autosklearn = autosklearn.classification.AutoSklearnClassifier(time_left_for_this_task=1800)
clf_autosklearn.fit(X2.drop(columns=['sex']), X2['sex'])


AutoSklearnClassifier(per_run_time_limit=180, time_left_for_this_task=1800)

In [24]:
# OBOE
from oboe import AutoLearner, error
import numpy as np 
xx_train = np.array(X3.drop(columns=['sex']))
yy_train = np.array(X3['sex'])
method = 'Oboe' # 'Oboe' or 'TensorOboe'
problem_type = 'classification'
clf_oboe = AutoLearner(p_type=problem_type, runtime_limit=30, method=method, verbose=False)
clf_oboe.fit(xx_train, yy_train)

{'ranks': [8, 9, 10, 11],
 'runtime_limits': [1, 2, 4, 8],
 'validation_loss': [0.5,
  0.24684581681261256,
  0.24684581681261256,
  0.22066931106574347,
  0.22066931106574347],
 'filled_new_row': [array([[ 4.65508219e-01,  4.37534194e-01,  3.16603378e-01,
           2.32668323e-01,  2.19617026e-01,  4.52981875e-01,
           4.14712518e-01,  2.95584051e-01,  1.99927356e-01,
           1.90023758e-01,  2.41022148e-01,  2.52296879e-01,
          -1.79353031e-02,  2.70273758e-02,  1.22719713e-01,
           2.55730369e-01,  4.87324753e-01,  7.01555883e-01,
           3.54105567e-01,  3.59533522e-01, -1.39227131e-02,
           2.32297809e-01,  2.35501176e-01,  2.23017741e-01,
          -3.15578018e-02, -3.11468487e-02,  2.00526068e-01,
          -2.75288837e-02,  1.54908522e-02,  7.17983587e-03,
           7.83461135e-02,  7.55096939e-02,  1.90415846e-01,
           1.87543043e-01,  3.28753334e-01,  3.24813372e-01,
           4.91519328e-01,  4.94423966e-01,  6.05009665e-01,
           

In [25]:
sklearn_pred = clf_autosklearn.predict(X_test.drop(columns=['sex']))
oboe_pred = clf_oboe.predict(X_test.drop(columns=['sex']))[0]
tpot_pred = clf_tpot.predict(X_test.drop(columns=['sex']))

In [26]:
from sklearn.metrics import accuracy_score

print(accuracy_score(X_test['sex'],sklearn_pred))
print(accuracy_score(X_test['sex'],oboe_pred))
print(accuracy_score(X_test['sex'],tpot_pred))

0.5430622009569378
0.5490430622009569
0.5358851674641149


In [27]:
# Create dataframe with classifiers predictions 
bagging = pd.DataFrame(list(zip(sklearn_pred, oboe_pred, tpot_pred)), columns=[ "AutoSklearn_Class", "Oboe_Class", "TPOT_Class"])

# Choose the class with most votes or the most accurate one (oboe)
def function(x) :
    if [x['AutoSklearn_Class'],x['Oboe_Class'], x['TPOT_Class']].count(0) >= 2 :
        return 0
    elif [x['AutoSklearn_Class'],x['Oboe_Class'], x['TPOT_Class']].count(1)>=2 :
        return 1
    elif  [x['AutoSklearn_Class'],x['Oboe_Class'], x['TPOT_Class']].count(2) >= 2 :
        return 2
    else :
        return x['Oboe_Class']

bagging['ensemble'] = bagging.apply(function, axis=1)

print(accuracy_score(X_test['sex'], bagging['ensemble'] ))

# We obtained a better result 
# I used to train each tool on the entire trainning data --> get a better results without ensemble learning 
# Here I trained each tool on different bootstraps of the training data (2/3 of the train data and test it on the test data)
# the train data == 80% of the entire dataset and the test data == 20% 

0.562200956937799


In [None]:
# Stacking ensemble learning 
# Using a linear metalearner 
# Using neural network (?)
