In [9]:
import pandas as pd
import numpy as np
from pathlib import Path

from preprocessing.pipeline_io import load_pipeline

from train_catboost_optuna import make_submission

In [10]:
TEST_DATA_PATH = 'data/test_data.jsonlines'

PIPELINE_PATH = 'artifacts/prep_v1'

COLS_TO_DROP_EARLY = ['subtitle','differential_pricing','international_delivery_mode','listing_source','site_id', 'coverage_areas']
DATE_COLS = ['date_created','last_updated']
TARGET = "condition"

SUBMISSION_PATH = Path("submission_5fold_catboost_optuna.csv")

In [11]:
df_test = pd.read_json(
    TEST_DATA_PATH, 
    lines=True, 
    convert_dates=DATE_COLS
)
df_test = df_test.drop(columns=COLS_TO_DROP_EARLY).copy()

In [12]:
try:
    pipe_loaded, _ = load_pipeline(PIPELINE_PATH)
    X = pipe_loaded.transform(df_test)
except FileNotFoundError:
    print("Pipeline not found. Please run the training pipeline first.")

In [None]:
X['ID'] = range(1, len(X) + 1)

out = make_submission(
    test_df=X,
    id_col='ID',
    model_dir=Path('artifacts'),
    submission_path=SUBMISSION_PATH,
)
print(f"Submission saved to: {out.resolve()}")

In [None]:
subm = pd.read_csv(SUBMISSION_PATH)
subm[TARGET] = np.where(subm[TARGET] == 1, 'new', 'used')
subm.to_csv(SUBMISSION_PATH, index=False)