In [None]:
import numpy as np
import pandas as pd

from tpot import TPOTRegressor
from sklearn.model_selection import train_test_split

In [5]:
####################################
##Reading data
####################################

breastcancer = pd.read_csv('./breastcancer/breast-cancer-diagnostic.shuf.lrn.csv')
seoulbike = pd.read_csv('seoulbike/SeoulBikeData.csv')
concrete = pd.read_csv('concrete/concrete_data.csv')

In [1]:
##############################
## Creating Config file for tpot operators and parameters
##############################


tpot_config = {
    'sklearn.linear_model.ElasticNet': {
        
        "max_iter": [1, 5, 10],
        "alpha": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
        "l1_ratio": (0, 1, 0.01)
    },

    'sklearn.tree.DecisionTreeRegressor': {
        'criterion': ["squared_error", "friedman_mse", "absolute_error", "poisson"],
        'splitter': ["best", "random"],
        'min_samples_split': [2, 8, 16, 24, 32, 42, 64],
        'min_samples_leaf': [1, 4, 8, 16, 32, 64, 128], 

    },

    'sklearn.neural_network.MLPRegressor': {
        ##'hidden_layer_sizes': [1, 4, 8, 16, 32, 64, 128],
        'activation': ['identity', 'logistic', 'tanh', 'relu'],
        'learning_rate': ['constant', 'invscaling', 'adaptive']

    }
}

Filtering the X and Y

In [None]:
# Breastcancer 

X = breastcancer.drop('class', axis=1)
Y = breastcancer['class']

In [None]:
# SeoulBikeData 

X = seoulbike.drop('Rented Bike Count', axis=1)
Y = seoulbike['Rented Bike Count']

In [None]:
# Concrete  

X = concrete.drop('Strength', axis=1)
Y = pd.DataFrame(concrete['Strength'])

Creating Test and split

In [23]:
##############################
## Spliting train and test
##############################


X_train, X_test, y_train, y_test = train_test_split(X, Y,
                                                    train_size=0.75, test_size=0.25, random_state=42)

initializing Tpot AutoML

In [None]:
##############################
## TPOTRegressor Config 
##############################

tpot = TPOTRegressor(generations=10, population_size=50, verbosity=2, random_state=42, config_dict=tpot_config)
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))


In [None]:
##########################################
## writing down the results into a file
#########################################

tpot.export('tpot_exported_pipeline.py')