# AutoML Generalized Framework

In [None]:
import numpy as np
import pandas as pd
from sklearn import *
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from tpot import TPOTClassifier, TPOTRegressor
from datacleaner import autoclean
from collections import defaultdict

In [None]:
class AutoMLEstimator(object):
    
    def __init__(self, **kwargs):
        
        self.task = kwargs['task']
        self.speed = kwargs['speed']
        self.test_size = kwargs['test_size']
        if self.task == 'classification':
            self.tpot_model = TPOTClassifier(generations=self.speed, population_size=self.speed*10, verbosity=2, n_jobs=-1)
        else:
            self.tpot_model = TPOTRegressor(generations=self.speed, population_size=self.speed*10, verbosity=2, n_jobs=-1)
        
    def preprocess_data(self, data, target_column):
        
        clean_data = autoclean(data)
        X = clean_data.drop(target_column, axis=1)
        y = clean_data[target_column]
        
        return X, y
    
    def split_data(self, X, y):
        
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=self.test_size) 
    
    def fit_model(self):
        
        self.tpot_model.fit(self.X_train, self.y_train)
        
    def run_automl(self, data, target_column):
        
        self.X, self.y = self.preprocess_data(data, target_column)
        self.split_data(self.X, self.y)
        self.fit_model()
    
    def evaluate_model(self):
        
        metrics = defaultdict()
        pred = self.tpot_model.fitted_pipeline_.predict(self.X_test)
        
        if self.task == 'classification':
            
            metrics['accuracy'] = accuracy_score(pred, self.y_test)
            metrics['precision'] = precision_score(pred, self.y_test)
            metrics['recall'] = recall_score(pred, self.y_test)
            
        else:
            
            metrics['r2_score'] = r2_score(pred, self.y_test)
            metrics['mean_absolute_error'] = mean_absolute_error(pred, self.y_test)
            metrics['mean_squared_error'] = mean_squared_error(pred, self.y_test)
        
        return metrics

In [None]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()

In [None]:
data.keys()

In [None]:
data['data'].shape

In [None]:
data['feature_names'].shape

In [None]:
data['target'].shape

In [None]:
data['target_names']

In [None]:
df = pd.DataFrame(columns=data['feature_names'], data=data['data'])
df['target'] = data['target']

In [None]:
auto_ml_model = AutoMLEstimator(task='classification', speed=2, test_size=0.3)
auto_ml_model.run_automl(df, target_column='target')

In [None]:
list(auto_ml_model.tpot_model.evaluated_individuals_.values())[0]

In [None]:
from sklearn.datasets import load_boston
data = load_boston()
df = pd.DataFrame(columns=data['feature_names'], data=data['data'])
df['target'] = data['target']

In [None]:
auto_ml_model = AutoMLEstimator(task='regression', speed=5, test_size=0.3)
auto_ml_model.run_automl(df, target_column='target')

In [None]:
auto_ml_model.evaluate_model()