In [None]:
#pip install catboost

In [2]:
import numpy as np
import pandas as pd
import pyarrow.parquet as pq
from catboost import CatBoostClassifier

In [5]:
test_pq = pq.read_table('/content/test.parquet') #input
test_pd = test_pq.to_pandas()

In [6]:
def generate_features(df):
    # Генерация статистических признаков из values
    agg_features = df['values'].apply(lambda x: pd.Series({
        'mean': np.mean(x),
        'median': np.median(x),
        'std': np.std(x),
        'min': np.min(x),
        'max': np.max(x),
        'range': np.max(x) - np.min(x),
        'last_value': x[-1],
        'first_value': x[0],
        'trend': np.polyfit(range(len(x)), x, 1)[0],
        'first_derivative_mean': np.mean(np.gradient(x)),
        'second_derivative_mean': np.mean(np.gradient(np.gradient(x))),
        'autocorr_1': pd.Series(x).autocorr(lag=1),
        'autocorr_2': pd.Series(x).autocorr(lag=2),
        'energy': np.sum(np.power(x, 2)),
        'skewness': pd.Series(x).skew(),
        'kurtosis': pd.Series(x).kurtosis(),
    }))

    df['dates'] = df['dates'].apply(pd.to_datetime)
    time_features = df['dates'].apply(lambda x: pd.Series({
        'num_years': x.max().year - x.min().year + 1,
        'num_months': len(np.unique(x.month)),
    }))

    features = pd.concat([agg_features, time_features], axis=1)

    return features

In [None]:
test_features = generate_features(test_pd)

In [9]:
model = CatBoostClassifier()

model.load_model('catboost_model.cbm')

predictions = model.predict_proba(test_features)

In [18]:
submission = pd.DataFrame({
    'id': test_pd['id'],
    'score': predictions[:,1]
})

submission.to_csv('submission.csv', index=False)