In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from google.colab import files

# Data Preprocessing

In [2]:
file_path = '/content/Lithium Carbonate 99%Min China Spot Historical Data (2).csv'
LTCB_99 = pd.read_csv(file_path)

In [3]:
LTCB_99['Date'] = pd.to_datetime(LTCB_99['Date'])
LTCB_99 = LTCB_99.sort_values('Date')
LTCB_99.set_index('Date', inplace=True)
LTCB_99.rename(columns = {'Change %': 'Obs_Return'}, inplace = True)
LTCB_99['Obs_Return'] = (LTCB_99['Obs_Return'].str.replace('%', '').astype(float))/100

In [4]:
return_na_count = LTCB_99['Obs_Return'].isna().sum()
if return_na_count > 0:
  LTCB_99 = LTCB_99.dropna(subset=['Obs_Return'])
  print("Number of missing values:", return_na_count)

In [6]:
print(LTCB_99.tail())

                 Price        Open        High         Low  Vol.  Obs_Return
Date                                                                        
2024-04-03  106,500.00  106,500.00  108,000.00  105,000.00   NaN      0.0095
2024-04-08  109,500.00  109,500.00  111,000.00  108,000.00   NaN      0.0282
2024-04-09  109,500.00  109,500.00  111,000.00  108,000.00   NaN      0.0000
2024-04-10  110,500.00  110,500.00  112,000.00  109,000.00   NaN      0.0091
2024-04-11  109,500.00  109,500.00  111,000.00  108,000.00   NaN     -0.0090


In [7]:
def unsmooth_returns_1_step(observed_returns, max_lags):
    """
    Unsmooth observed returns using a 1-step MA Unsmoothing Method.

    Parameters:
    - observed_returns: observed (smoothed) returns.
    - max_lags: Maximum number of lags (H) to consider for the MA model.

    Returns:
    - true_returns: estimated true (unsmoothed) returns.
    """

    # Fit an MA model to the observed returns
    # Using AIC to choose the best number of lags
    best_aic = np.inf
    best_order = None
    best_model = None

    for lag in range(max_lags + 1):
        try:
            model = sm.tsa.ARIMA(observed_returns, order=(0, 0, lag)).fit()
            if model.aic < best_aic:
                best_aic = model.aic
                best_order = lag
                best_model = model
        except:
            continue
    # Extract the residuals from the best MA model
    if best_model is not None:
        true_returns = best_model.resid + best_model.params.get('const', 0)
    else:
        # If no model is successfully fitted, fallback to observed returns
        true_returns = observed_returns.copy()

    return true_returns

In [8]:
# Best model among MA(1)~MA(5) models
one_step_true_MA = unsmooth_returns_1_step(observed_returns=LTCB_99['Obs_Return'], max_lags=5)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [9]:
one_step_true_MA.to_csv(r'LTCB99_1Step_trueMA.csv', index=True, header=True)
files.download('LTCB99_1Step_trueMA.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>