In [1]:
import numpy as np
import pandas as pd

from tpot import TPOTRegressor
from sklearn.model_selection import train_test_split

In [2]:
####################################
##Reading data
####################################

breastcancer = pd.read_csv('datasets/breast-cancer-diagnostic.shuf.lrn_processed.csv')
seoulbike = pd.read_csv('datasets/SeoulBikeData_processed.csv')
concrete = pd.read_csv('datasets/concrete_data_processed.csv')


In [3]:
##############################
## Creating Config file for tpot operators and parameters
##############################


tpot_config = {
    'sklearn.linear_model.ElasticNet': {
        
        "max_iter": [1, 5, 10],
        "alpha": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
        "l1_ratio": (0, 1, 0.01)
    },

    'sklearn.tree.DecisionTreeRegressor': {
        'criterion': ["squared_error", "friedman_mse", "absolute_error", "poisson"],
        'splitter': ["best", "random"],
        'min_samples_split': [2, 8, 16, 24, 32, 42, 64],
        'min_samples_leaf': [1, 4, 8, 16, 32, 64, 128], 

    },

    'sklearn.neural_network.MLPRegressor': {
        'hidden_layer_sizes': [(50,50,50), (100,100,100), (150,150,150)],
        'alpha': [0.0001, 0.05],
        'activation': ['identity', 'logistic', 'tanh', 'relu'],
        'learning_rate': ['constant', 'invscaling', 'adaptive']

    }
}

Filtering the X and Y

In [10]:
# Breastcancer 

X = breastcancer.drop('target', axis=1)
Y = breastcancer['target']

In [9]:
# SeoulBikeData 

X = seoulbike.drop('Rented Bike Count', axis=1)
Y = seoulbike['Rented Bike Count']

Unnamed: 0.1,Unnamed: 0,target,0,1,2,3,4
0,0,0,-0.481087,-2.020527,-0.985558,0.671805,-0.425078
1,1,0,-1.240146,-0.983626,-0.153250,-0.318409,-0.950779
2,2,1,1.007390,-1.383571,-0.156056,0.087486,0.438858
3,3,0,-1.701572,3.228727,6.179880,-2.636937,2.736508
4,4,0,3.551157,5.727049,0.423446,4.128171,-2.054695
...,...,...,...,...,...,...,...
280,280,1,11.017219,-2.653681,-3.008669,0.358757,0.834171
281,281,1,2.430814,1.556917,-1.632908,-0.561531,0.748955
282,282,0,-1.253447,-1.515366,-0.455662,0.770502,-0.888184
283,283,1,4.550997,-2.151633,0.215106,0.462087,-0.022248


In [4]:
# Concrete  

X = concrete.drop('Strength', axis=1)
Y = concrete['Strength']

Creating Test and split

In [5]:
##############################
## Spliting train and test
##############################


X_train, X_test, y_train, y_test = train_test_split(X, Y,
                                                    train_size=0.75, test_size=0.25, random_state=1122)

initializing Tpot AutoML

In [6]:
##############################
## TPOTRegressor Config 
##############################

tpot = TPOTRegressor(generations=5, population_size=100, verbosity=2, config_dict=tpot_config)
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))


Optimization Progress:   0%|          | 0/600 [00:00<?, ?pipeline/s]

In [31]:
##########################################
## writing down the results into a file
#########################################

tpot.export('tpot_exported_pipeline.py')