In [None]:
from copy import copy
from datetime import datetime

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tpot import TPOTClassifier

In [None]:
df = pd.read_csv("../dataset.csv")
df.head()

In [None]:
df.info()

In [None]:
df = shuffle(df)

In [None]:
# target_name = "acceptability_80"
# target_name = "acceptability_90"
# target_name = "tmp_cmf"
# target_name = "tmp_cmf_80_low"
# target_name = "tmp_cmf_80_up"
# target_name = "tmp_cmf_80_low"
# target_name = "tmp_cmf_90_up"
# target_name = "tmp_cmf_90_low"
target_name = "acceptability_90"

In [None]:
df.rename(columns={target_name: 'target'}, inplace=True)

df['datetime'] = pd.to_datetime(df['datetime'], unit='ms')

df['hour'] = df['datetime'].dt.hour
df['minute'] = df['datetime'].dt.minute
df['second'] = df['datetime'].dt.second

In [None]:
df.columns

In [None]:
features = [
    'temperature',
    'mean_temp_day',
    'heatindex',
    'relative_humidity',
    'light_sensor_one_wavelength',
    'light_sensor_two_wavelength',
    'number_occupants',
    'activity_occupants',
    'door_state',
    'hour',
    'minute',
    'second',
    'target',
]

In [None]:
y = df['target']
X = df[features]

In [None]:
export_features = copy(features)
export_features.append('target')

df[export_features].to_csv("processed_data.csv")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y.values, test_size=0.33)

In [None]:
pipeline_optimizer = TPOTClassifier(generations=5, n_jobs=-1, early_stop=3, verbosity=3)

In [None]:
pipeline_optimizer.fit(X_train, y_train)

In [None]:
score = pipeline_optimizer.score(X_test, y_test)
score

In [None]:
pipeline_optimizer.export(f"{score}_{datetime.today().strftime('%Y%m%d%H%M%S')}_tpot_exported_pipeline.py",
                          data_file_path="processed_data.csv")