In [2]:
import sys
sys.path.append("..")
import Data as dt
import ChevalParesseux_lib as lib

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# **I. Import Data**

In [3]:
# ======= I. Load Data =======
data = dt.load_data(ticker='A')
data = data[data['date'] > '2020-01-01']
data.index = pd.to_datetime(data['date'])

alt_data = dt.load_data(ticker='AAPL')
alt_data = alt_data[alt_data['date'] > '2020-01-01']
alt_data.index = pd.to_datetime(alt_data['date'])

# ======= II. Plot Data to ensure its integrity =======
# plt.figure(figsize=(17, 4))
# plt.plot(data['close'], label='Close Price')
# plt.title('A Close Price')
# plt.xlabel('Date')
# plt.ylabel('Price')
# plt.legend()
# plt.show()

In [9]:
from joblib import Parallel, delayed
from typing import Self

class linear_tempReg_feature():
    """
    Rolling Linear Temporal Regression Feature

    This class computes a rolling linear regression statistics over a time series. 
    For each window, it fits a linear model and extracts the slope, t-statistic, p-value, and R-squared.
    It inherits from the Feature base class and implements methods to:
        - set_params : define parameter grids.
        - process_data : optionally performs preprocessing on the input series.
        - get_feature : compute the moving linear temporal regression feature over a rolling window
    """
    def __init__(
        self, 
        name: str = "linear_tempreg" , 
        n_jobs: int = 1
    ) -> None:
        """
        Initializes the linear_tempReg_feature object with the input time series.

        Parameters:
            - data (pd.Series): The raw input time series.
            - name (str): Feature name.
            - n_jobs (int): Number of parallel jobs to use.
        """
        self.name=name
        self.n_jobs=n_jobs
    
    #?____________________________________________________________________________________ #
    def set_params(
        self,
        window: list = [5, 10, 30, 60],
        smoothing_method: list = [None, "ewma", "average"],
        window_smooth: list = [5, 10],
        lambda_smooth: list = [0.1, 0.2, 0.5],
    ) -> Self:
        """
        Sets the parameter grid for the rolling linear regression feature extraction.

        Parameters:
            - window (list): Rolling window sizes for regression.
            - smoothing_method (list): Type of smoothing to apply before regression.
            - window_smooth (list): Smoothing window sizes.
            - lambda_smooth (list): Decay factors for EWMA smoothing.
        """
        self.params = {
            "window": window,
            "smoothing_method": smoothing_method,
            "window_smooth": window_smooth,
            "lambda_smooth": lambda_smooth,
        }

        return self

    #?____________________________________________________________________________________ #
    def process_data(
        self, 
        data: pd.Series,
    ) -> pd.Series:
        """
        Applies preprocessing to the input data before feature extraction.
        
        Parameters:
            - data (pd.Series): The input data to be processed.
        
        Returns:
            - processed_data (pd.Series): The smoothed series, or raw series if no smoothing is applied.
        ________
        N.B: The feature does not require preprocessing, but this method is kept for consistency.
        """
        return data
    
    #?____________________________________________________________________________________ #
    def get_feature(
        self,
        data: pd.Series,
        window: int,
        smoothing_method: str,
        window_smooth: int,
        lambda_smooth: float,
    ) -> pd.DataFrame:
        """
        Computes rolling linear regression statistics (slope, t-stat, p-value, R-squared)
        on the smoothed series over the specified window.

        Parameters:
            - data (pd.Series): The input data to be processed.
            - window (int): Rolling window size for regression.
            - smoothing_method (str): Smoothing method to apply before regression.
            - window_smooth (int): Smoothing window size.
            - lambda_smooth (float): Decay factor for EWMA smoothing.

        Returns:
            - features_df (pd.DataFrame): DataFrame containing regression statistics.
        """
        # ======= 0. Intermediate functions =======
        def compute_regression(
            series: pd.Series, 
            start_idx: int, 
            window: int
        ) -> tuple:
            
            current_window = series.iloc[start_idx - window + 1: start_idx + 1]
            intercept, coefficients, statistics, residuals = lib.get_simple_TempReg(series=current_window)
            
            return start_idx, intercept, coefficients, statistics, residuals
            

        # ======= I. Smooth the Data & Preprocess =======
        # smoothed_series = self.smooth_data(
        #     data=data, 
        #     smoothing_method=smoothing_method, 
        #     window_smooth=window_smooth, 
        #     lambda_smooth=lambda_smooth
        # )
        
        processed_series = self.process_data(data=data).dropna()

        # ======= II. Compute the rolling regression statistics =======
        results = Parallel(n_jobs=self.n_jobs)(
            delayed(compute_regression)(processed_series, i, window)
            for i in range(window - 1, len(processed_series))
        )

        # ======= III. Convert to pd.Series =======
        rolling_slope = pd.Series({i: coeffs[0] for i, _, coeffs, _, _ in results}) 
        rolling_tstat = pd.Series({i: stats['t_stats'][0] for i, _, _, stats, _ in results})
        rolling_pvalue = pd.Series({i: stats['p_values'][0] for i, _, _, stats, _ in results})
        rolling_r2 = pd.Series({i: stats['r2'] for i, _, _, stats, _ in results})
        
        # ======= IV. Rearrange the index =======
        rolling_slope.index = processed_series.index[window - 1:]
        rolling_tstat.index = processed_series.index[window - 1:]
        rolling_pvalue.index = processed_series.index[window - 1:]
        rolling_r2.index = processed_series.index[window - 1:]
        
        # ======= V. Center =======
        rolling_slope = rolling_slope / (processed_series + 1e-8)
        
        # ======= IV. Change Name =======
        features_df = pd.DataFrame({
            f"{self.name}_slope_{window}_{smoothing_method}_{window_smooth}_{lambda_smooth}": rolling_slope,
            f"{self.name}_tstat_{window}_{smoothing_method}_{window_smooth}_{lambda_smooth}": rolling_tstat,
            f"{self.name}_pvalue_{window}_{smoothing_method}_{window_smooth}_{lambda_smooth}": rolling_pvalue,
            f"{self.name}_r2_{window}_{smoothing_method}_{window_smooth}_{lambda_smooth}": rolling_r2,
        })
        
        return features_df


# **II. Prepare feature class**

In [None]:
# ======= I. User Defining Parameters =======
feature_model = lib.kalmanOU_feature
feature_params = {
    'window': [100],
    'smoothing_method': [None],
    'window_smooth': [5],
    'lambda_smooth': [0.2],
}

feature_name = 'cointegration'
n_jobs = 10

In [None]:
# ======= II. Initialize =======
feature = feature_model(name=feature_name, n_jobs=n_jobs)

# ======= III. Set the Parameters =======
feature = feature.set_params(**feature_params)

# **III. Checking Smoothing/Preprocessing Method**

In [None]:
# smoothed_data_ewma = feature.smooth_data(data=data['close'], smoothing_method='ewma', window_smooth=10, lambda_smooth=0.2)
# smoothed_data_ma = feature.smooth_data(data=data['close'], smoothing_method='average', window_smooth=10, lambda_smooth=0.2)

# processed_data = feature.process_data(data=smoothed_data_ewma)

# plt.figure(figsize=(17, 4))
# plt.plot(data['close'], label='Close Price')
# plt.plot(smoothed_data_ewma, label='Smoothed Close Price using EWMA', color='orange')
# plt.plot(smoothed_data_ma, label='Smoothed Close Price using MA', color='green')
# plt.plot(processed_data, label='Processed Data', color='red')
# plt.title('A Close Price')
# plt.xlabel('Date')
# plt.ylabel('Price')
# plt.legend()
# plt.show()

# **IV. Checking Extraction**

In [None]:
tuple_data = (np.log(data['close']), np.log(alt_data['close']))

In [None]:
features_df = feature.extract(data=tuple_data)
# alt_features_df = feature.extract(data=alt_data['close'])

plt.figure(figsize=(17, 4))
plt.plot(features_df)
plt.show()

# plt.figure(figsize=(17, 4))
# plt.plot(alt_features_df)
# plt.show()