# Time Series Modeling for Strawberry Price Prediction

This notebook implements various time series models using different interpolation methods.

In [None]:
import pandas as pd
import numpy as np
import joblib

from src.fct_feature_eng import *
from src.fct_model import *
from src.parameter import get_dict_params

## 1. Load Data

In [None]:
# Load data
data = pd.read_csv('data/raw/senior_ds_test.csv')
data['start_date'] = pd.to_datetime(data['start_date'])

# Split data
train_data, test_data = split_train_test(data)

## 2. Process Data with Different Interpolation Methods

In [None]:
# Load parameters
dict_params = get_dict_params()
interpolation_methods = dict_params['interpolation_methods']
model_types = dict_params['model_types']

processed_data = {}

for method in interpolation_methods:
    # Process data with current interpolation method
    train_processed = preprocessing(train_data, is_training=True, interpolation_method=method)
    test_processed = preprocessing(test_data, is_training=False, interpolation_method=method)
    
    # Create time series of the target
    train_ts = train_processed.set_index('start_date')['price']
    test_ts = test_processed.set_index('start_date')['price']
    
    # Check for missing values
    if train_ts.isnull().any() or test_ts.isnull().any():
        raise ValueError(f"Missing values detected in {method} processed data")
    
    # Save of the results
    processed_data[method] = (train_ts, test_ts)
    
    print(f"\nProcessed with {method} interpolation:")
    print(f"Training set shape: {train_ts.shape}")
    print(f"Testing set shape: {test_ts.shape}")

## 3. Train and Save Models for Each Interpolation Method

In [None]:
model_functions = {
    'naive': lambda train, test: (naive_forecast(train, test), None),
    'arima': fit_arima_model,
    'xgboost': fit_xgboost_model
}

for method in interpolation_methods:
    print(f"\nTraining models with {method} interpolation:")
    train_ts, test_ts = processed_data[method]
    
    for model_type in model_types:
        fit_func = model_functions[model_type]
        pred, model = fit_func(train_ts, test_ts)
        save_model_and_predictions(model, pred, f'{model_type}_{method}')
    
    print(f"Completed training models with {method} interpolation")

# Save test data for evaluation
for method in interpolation_methods:
    _, test_ts = processed_data[method]
    joblib.dump(test_ts, f'models/test_data_{method}.joblib')

print("\nAll models have been trained and saved.")