In [3]:
import pandas as pd
import numpy as np

# --- Feature engineering ---
def extract_features(df: pd.DataFrame) -> pd.Series:
    # Separate the time series into 'before' and 'after' the boundary
    before = df[df["period"] == 0]["value"]
    after = df[df["period"] == 1]["value"]

    features = {
        "mean_diff": after.mean() - before.mean(),
        "std_diff": after.std() - before.std(),
        "median_diff": after.median() - before.median(),
        "iqr_diff": (
            np.percentile(after, 75) - np.percentile(after, 25)
        ) - (
            np.percentile(before, 75) - np.percentile(before, 25)
        ),
        "mean_ratio": after.mean() / (before.mean() + 1e-8),
        "std_ratio": after.std() / (before.std() + 1e-8),
        "skew_diff": after.skew() - before.skew(),
        "kurtosis_diff": after.kurtosis() - before.kurtosis(),
        "min_diff": after.min() - before.min(),
        "max_diff": after.max() - before.max(),
    }

    return pd.Series(features).to_numpy()


In [4]:
import os

path = "/Users/arbaaz/Downloads/break/ds"
os.listdir(path)

['y_train.parquet',
 'X_train.parquet',
 '.gitignore',
 'X_test.reduced.parquet',
 'y_test.reduced.parquet',
 '.cache']

In [9]:
X_train_path = path+"/X_test.reduced.parquet"
df = pd.read_parquet(X_train_path)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,value,period
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1
10001,0,0.010753,0
10001,1,-0.031915,0
10001,2,-0.010989,0
10001,3,-0.011111,0
10001,4,0.011236,0
...,...,...,...
10101,2002,0.002466,1
10101,2003,0.010499,1
10101,2004,-0.026688,1
10101,2005,-0.019126,1


In [None]:
import os
import typing 
import torch
from sklearn.preprocessing import StandardScaler

def infer(
    X_test: typing.Iterable[pd.DataFrame],
    model_directory_path: str,
):
    scaler = StandardScaler()
    train_data = scaler.fit_transform(train_data)
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    model_path = os.path.join(model_directory_path, 'model.pt')
    state_dict = torch.load(model_path)
    
    model = Model()
    model.load_state_dict(state_dict)
    model = model.to(device)
    model.eval()
    
    for id, g in X_test.groupby("id"):
        inputs = extract_features(g)
        inputs = scaler.transform(inputs)
        inputs = torch.tensor(inputs)
        inputs = inputs.to(device, non_blocking=True)
        
        with torch.inference_mode():
            with torch.autocast(device_type=device, dtype=torch.float16, cache_enabled=True):
                logits = model(inputs)
        prediction = logits.sigmoid()
        prediction = prediction.cpu().detach().numpy().double()
        yield prediction  # send the prediction for the current dataset