In [19]:
from os import XATTR_REPLACE
import pandas as pd
from joblib import dump, load
import numpy as np
import statsmodels.tsa.seasonal

def read_parquet_file(name):
  return pd.read_parquet(name)

def listToTimeLine(dates, values):
  df = pd.DataFrame({'dates': dates[0], 'values': values[0]})
  return df

def iteratingTimeSeries(i, df):
  df_np = np.array(df)[i]
  df_i = pd.DataFrame(df_np).T
  return listToTimeLine(df_i[0], df_i[1])

def getDfComposed(df):
  return statsmodels.tsa.seasonal.seasonal_decompose(df, model='additive', filt=None, period=None, two_sided=True, extrapolate_trend=10)

def prepare_data(df):
  df = df.drop('id', axis=1)
  df_features = pd.DataFrame(columns = ['observed', 'seasonal', 'trend', 'resid', 'weights'])
  for i in range(len(df)):
    df_i = iteratingTimeSeries(i, df)

    df_transformed = pd.DataFrame(data=np.array(df_i['values']), index = pd.to_datetime(np.array(df_i['dates'])), columns = ['values']).fillna(0)
    df_composed = getDfComposed(df_transformed)
    features = [df_composed.observed.mean(), df_composed.seasonal.mean(), df_composed.trend.mean(), df_composed.resid.mean(), df_composed.weights.mean()]
    df_features.loc[len(df_features)] = features
  return df_features

def featureEngineering(X):
  X['rolling_std'] = X['observed'].rolling(window=5).std()
  X['trend'] = (np.array(X['trend']) / np.array(np.abs(X['trend']))) * np.log(np.abs(X['trend'])**2)
  X['rolling_std'] = X['rolling_std'].fillna(X['rolling_std'].mean())
  return X

Пусть тестовый файл - test.parquet (можно заменить его на любой другой файл этого типа)

In [15]:
df = read_parquet_file('test.parquet')
id = df['id']
df = prepare_data(df)
df = df.drop('weights', axis=1)
df = featureEngineering(df)

In [None]:
!pip install catboost
from catboost import CatBoostClassifier
model = load('model.joblib')

In [23]:
final_pred = model.predict_proba(df)[:, 1]
submission = pd.DataFrame({'id': id, 'score': final_pred})

In [25]:
submission.to_csv('submission.csv', index=False)