In [2]:
import pandas as pd
import numpy as np


def extract_features(df: pd.DataFrame) -> pd.Series:

    before = df[df["period"] == 0]["value"]
    after = df[df["period"] == 1]["value"]

    features = {
        "mean_diff": after.mean() - before.mean(),
        "std_diff": after.std() - before.std(),
        "median_diff": after.median() - before.median(),
        "iqr_diff": (
            np.percentile(after, 75) - np.percentile(after, 25)
        ) - (
            np.percentile(before, 75) - np.percentile(before, 25)
        ),
        "mean_ratio": after.mean() / (before.mean() + 1e-8),
        "std_ratio": after.std() / (before.std() + 1e-8),
        "skew_diff": after.skew() - before.skew(),
        "kurtosis_diff": after.kurtosis() - before.kurtosis(),
        "min_diff": after.min() - before.min(),
        "max_diff": after.max() - before.max(),
    }

    return pd.Series(features).to_numpy()


In [3]:
def train(
    X_train: pd.DataFrame,
    y_train: pd.Series,
    model_directory_path: str,
):
    # For our baseline t-test approach, we don't need to train a model
    # This is essentially an unsupervised approach calculated at inference time
    model = None

    # You could enhance this by training an actual model, for example:
    # 1. Extract features from before/after segments of each time series
    # 2. Train a classifier using these features and y_train labels
    # 3. Save the trained model



In [None]:
import os
import typing 
import joblib
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler

def infer(
    X_test: typing.Iterable[pd.DataFrame],
    model_directory_path: str,
):  
    scaler = joblib.load(os.path.join(model_directory_path, "scaler.joblib"))
    model = XGBClassifier()  
    model.load_model(os.path.join(model_directory_path, "xgb.json"))
    yield
    
    for i in X_test:
        f = extract_features(i)
        i = i.reshape(1, -1)
        i = scaler.transform(i)
        prediction = model.predict_proba(i)[:, 1]
        prediction = prediction.astype(np.float64)
        yield prediction