In [None]:
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt
import seaborn as sns
from IPython.display import display
from sklearn.feature_extraction import DictVectorizer
from sklearn.model_selection import train_test_split, KFold, RepeatedStratifiedKFold
from tpot import TPOTClassifier

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mutual_info_score, roc_curve, auc, roc_auc_score

from autosklearn.classification import AutoSklearnClassifier

from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

In [None]:
path = './data/Telco-Customer-Churn.csv'
data = pd.read_csv(path)
data.columns = data.columns.str.replace(' ', '_').str.lower()

categorical_col = data.dtypes[data.dtypes == 'object'].index.tolist()

for col in categorical_col:
    data[col] = data[col].str.replace(' ', '_').str.lower()


data['churn'] = (data.churn=='yes').astype(int)
categorical_col = data.dtypes[data.dtypes == 'object'].index.tolist()
numerical_col = ['tenure', 'totalcharges', 'monthlycharges']

categorical_col.remove('customerid')

In [None]:
train_data, test_data = train_test_split(data, test_size=0.2,
                                         random_state=0)

train_x = train_data.drop(['churn'], axis = 1)
test_x = test_data.drop(['churn'], axis = 1)

train_y = train_data.pop('churn')
test_y = test_data.pop('churn')

In [None]:
dv = DictVectorizer(sparse = False)

dv.fit(train_x[categorical_col + numerical_col].to_dict(orient = 'records'))
feature_names = dv.feature_names_

train_x = dv.transform(train_x[categorical_col + numerical_col].to_dict(orient = 'records'))
train_x = dv.transform(test_x[categorical_col + numerical_col].to_dict(orient = 'records'))


train_df = pd.DataFrame(train_x, columns=feature_names)
test = pd.DataFrame(test_x, columns=feature_names)

In [None]:
automl = AutoSklearnClassifier(time_left_for_this_task=5*60,
                                per_run_time_limit=30,
                                ensemble_size = 1,
                                tmp_folder='/temp/autosklearn_classification_example_tmp')

automl.fit(train_x, train_y)
print(automl.sprint_statistics())

print(automl.leaderboard())