In [1]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from tsfresh.transformers import RelevantFeatureAugmenter

In [2]:
# load data

X = pd.read_csv("occupancy.csv", parse_dates=["date"])

y = pd.read_csv("occupancy_target.csv", index_col="id")
y = pd.Series(y["occupancy"])

In [3]:
tmp = pd.DataFrame(index=y.index)

# Split data into train and test set
X_train, X_test, y_train, y_test = train_test_split(tmp, y)

In [4]:
kind_to_fc_parameters = {
    "light": {
        "c3": [{"lag": 3}, {"lag": 2}, {"lag": 1}],
        "abs_energy": None,
        "sum_values": None,
        "fft_coefficient": [{"attr": "real", "coeff": 0}, {"attr": "abs", "coeff": 0}],
        "spkt_welch_density": [{"coeff": 2}, {"coeff": 5}, {"coeff": 8}],
        "agg_linear_trend": [
            {"attr": "intercept", "chunk_len": 50, "f_agg": "var"},
            {"attr": "slope", "chunk_len": 50, "f_agg": "var"},
        ],
        "change_quantiles": [
            {"f_agg": "var", "isabs": False, "qh": 1.0, "ql": 0.8},
            {"f_agg": "var", "isabs": True, "qh": 1.0, "ql": 0.8},
        ],
    },
    "co2": {
        "fft_coefficient": [{"attr": "real", "coeff": 0}, {"attr": "abs", "coeff": 0}],
        "c3": [{"lag": 3}, {"lag": 2}, {"lag": 1}],
        "sum_values": None,
        "abs_energy": None,
        "sum_of_reoccurring_data_points": None,
        "sum_of_reoccurring_values": None,
    },
    "temperature": {"c3": [{"lag": 1}, {"lag": 2}, {"lag": 3}], "abs_energy": None},
}

In [5]:
augmenter = RelevantFeatureAugmenter(
    column_id="id",
    column_sort="date",
    kind_to_fc_parameters=kind_to_fc_parameters,
)

In [6]:
pipe = Pipeline(
    [
        ("augmenter", augmenter),
        ("classifier", LogisticRegression(random_state=10, C=0.01)),
    ]
)

In [7]:
pipe.set_params(augmenter__timeseries_container=X)

In [8]:
pipe.fit(X_train, y_train)

Feature Extraction: 100%|█████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  2.72it/s]


In [9]:
print(classification_report(y_test, pipe.predict(X_test)))

Feature Extraction: 100%|█████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  2.84it/s]

              precision    recall  f1-score   support

           0       0.96      1.00      0.98        27
           1       1.00      0.86      0.92         7

    accuracy                           0.97        34
   macro avg       0.98      0.93      0.95        34
weighted avg       0.97      0.97      0.97        34




