In [1]:
import pandas as pd
import numpy as np
from statistics import median, mode
import joblib
import os

In [2]:
def extract_features(row:pd.Series)->pd.Series:
    values = np.array(row['values'])
    features = {
        'feature_median': median(values),
        'feature_mode': mode(values),
        'feature_q_5': np.quantile(values, 0.05, axis=0),
        'feature_q_25': np.quantile(values, 0.25, axis=0),
        'feature_q_75': np.quantile(values, 0.75, axis=0),
        'feature_q_95': np.quantile(values, 0.95, axis=0),
        'feature_std': np.std(values),
        '1/feature_q_5': 1/np.quantile(values, 0.05, axis=0),
        'feature_q_75**3': np.quantile(values, 0.75, axis=0)**3,
        'feature_median**2': median(values)**2,
        'feature_mode**3': mode(values)**3
    }
    return pd.Series(features)

In [10]:
with open('fitted_model.pkl', 'rb') as fd:
    model = joblib.load(fd) 

In [4]:
data_test = pd.read_parquet('test.parquet')

In [5]:
df_test = data_test.apply(extract_features, axis=1)

In [16]:
pred_proba = model.predict_proba(df_test)[:, 1]

In [18]:
data_test['score'] = pred_proba

In [19]:
data_test[['id', 'score']].to_csv('submission.csv', sep=',', index=False)