In [1]:
from typing import Union, Optional

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import chi2

from sklearn.linear_model import Ridge, lasso_path, RidgeCV

In [2]:
# tbg = pd.read_csv("./trends_by_income_groups.csv", index_col=0, parse_dates=["date"])

In [13]:
class LinearFeatureSelector(object):
    def __init__(self, targets, default_status: str = "Auto"):
        self.targets: np.ndarray = np.asarray(targets).reshape((-1, 1))
        self.targets_mean: np.float64 = np.mean(self.targets, dtype=np.float64)
        self.targets_std: np.float64 = np.std(self.targets, dtype=np.float64)
        self.targets = (self.targets-self.targets_mean) / self.targets_std
        self.obs_count: int = self.targets.shape[0]
        
        self.features = list()  # The json, where the features are stored
        self.feature_index = dict()  # A dict for easy searching for a named feature class
        self.unnamed_group_count = 0  # A counter required to destinguish the unnamed features in the model
        
        if default_status in {"Auto", "On", "Off"}:
            self.default_status = default_status  # The default status for new features
        else:
            raise ValueError(
                f"Unsupported default status {default_status}. The supported "
                + "values are \"On\", \"Off\", and \"Auto\"."
            )
        
        self.log = "Initialized the model\n"  # TODO: add some proper logging


    def __check_names_integrity(self):
        for feature_name, index in self.feature_index.items():
            assert self.features[index]["name"] == feature_name, "The feature index is inconsistent"
        for index, feature_cls in enumerate(self.features):
            assert self.feature_index[feature_cls["name"]] == index, "The feature index is inconsistent"
        for feature_cls in self.features:
            assert (
                len(feature_cls["features"])
                == len({feature["name"] for feature in feature_cls["features"]})
            ), "Feature class {0} has duplicate feature names".format(feature_cls.get("name", ""))

            
    def _check_integrity(self):
        self.__check_names_integrity()
        
    def _prepare_feature_cls_dict(self, name: str, **kwargs) -> dict:
        out = {
            "name": name,
        }
        out.update(kwargs)
        return out
    
    def _prepare_feature_dict(self, name: str, **kwargs) -> dict:
        out = {
            "name": name,
            "status": self.default_status,
        }
        out.update(kwargs)
        return out

    def _add_array_like_feature(self, feature: Union[np.ndarray, pd.Series, pd.DataFrame], cls_name=None):
        if cls_name is None:
            cls_name = f"external_feature_{self.unnamed_group_count}"
            self.unnamed_group_count += 1
        else:
            cls_name = str(cls_name)
            if cls_name in self.feature_index:
                raise ValueError(f"Feature class name {cls_name} is already taken.")
        
        class_core = self._prepare_feature_cls_dict(cls_name)
        # Checking feature's shape compatibility
        if len(feature.shape) > 2:
            raise ValueError(f"Input dimention mismatch. Got {len(feature.shape)} dimentions.")
        if feature.shape[0] != self.obs_count:
            raise ValueError(
                f"Input shape mismatch. Got {feature.shape} input "
                + f"for data with {self.obs_count} observations"
            )

        if isinstance(feature, np.ndarray):
            feature = feature.reshape((self.obs_count, -1))
            class_core["features"] = [
                self._prepare_feature_dict(
                    str(feature_num),
                    value=feature[:, feature_num].reshape((-1, 1)),
                )
                for feature_num in range(feature.shape[1])
            ]
        elif isinstance(feature, pd.Series):
            class_core["features"] = [
                self._prepare_feature_dict(
                    str(feature.name) if feature.name is not None else "0",
                    value=feature.values.reshape((-1, 1)),
                )
            ]
        elif isinstance(feature, pd.DataFrame):
            class_core["features"] = [
                self._prepare_feature_dict(
                    str(name) if name is not None else str(i),
                    value=feature.values[:, i].reshape((-1, 1)),
                )
                for i, name in enumerate(feature.columns)
            ]
        else:
            raise ValueError(
                f"Input type ({type(feature)}) is not recognized. "
                + "Expected one of [np.ndarray, pd.Series, pd.DataFrame]"
            )

        self.feature_index[cls_name] = len(self.features)
        self.features.append(class_core)        
        self._check_integrity()  # Debug run
        self.log += f"Added external feature {cls_name}\n"

    def add_features(self, *args, **kwargs):
        for arg in args:
            if isinstance(arg, (np.ndarray, pd.Series, pd.DataFrame)):
                self._add_array_like_feature(arg)
            elif isinstance(arg, (list, tuple, set)):
                for feature in arg:
                    if isinstance(feature, (np.ndarray, pd.Series, pd.DataFrame)):
                        self._add_array_like_feature(feature)
                    else:
                        raise ValueError(f"Input type is not recognized. Got <{type(arg)}>[{type(feature)}]")
            elif isinstance(arg, dict):
                for key, feature in arg.items():
                    if isinstance(feature, (np.ndarray, pd.Series, pd.DataFrame)) and isinstance(key, str):
                        self._add_array_like_feature(feature, cls_name=key)
                    else:
                        raise ValueError(f"Input type is not recognized. Got <dict>[{type(key)}: {type(feature)}]")
            else:
                raise ValueError(f"Input type is not recognized. Got {type(arg)}")
        for key, feature in kwargs.items():
            if isinstance(feature, (np.ndarray, pd.Series, pd.DataFrame)):
                self._add_array_like_feature(feature, cls_name=key)
            else:
                raise ValueError(f"Keyword argument type is not recognized. Got {type(feature)}")


    def _get_feature_stats(self, feature: dict, cls_meta: dict) -> dict:
        mean: np.float64 = np.mean(feature["value"], dtype=np.float64)
        std: np.float64 = np.std(feature["value"], dtype=np.float64)
        corr: np.float64 = np.mean(feature["value"]*self.targets, dtype=np.float64) / std
        m2: np.float64 = np.mean(np.square(feature["value"]), dtype=np.float64)
        m3: np.float64 = np.mean(np.power(feature["value"], 3), dtype=np.float64)
        m4: np.float64 = np.mean(np.power(feature["value"], 4), dtype=np.float64)
        test_err_matr = np.array([[m2 - (mean*mean), m3 - (mean*m2)], [m3 - (mean*m2), m4 - (m2*m2)]])
        test_vec = np.array([[mean], [m2-1]])
        test_stat = self.obs_count * test_vec.T.dot(np.linalg.inv(test_err_matr).dot(test_vec))[0, 0]
        test_pval = chi2.sf(test_stat, 2)
        return {
            "status": feature["status"],
            "corr.w.target": corr,
            "norm. test p-val": test_pval,
        }

    def display_features(self):
        index, data = list(), list()
        for feature_cls in self.features:
            cls_meta = {k: v for k, v in feature_cls.items() if k != "features"}
            for feature in feature_cls["features"]:
                index.append((feature_cls["name"], feature["name"]))
                data.append(self._get_feature_stats(feature, cls_meta))
        return pd.DataFrame(data, index=pd.MultiIndex.from_tuples(index, names=["class", "feature"]))


    def _update_single_status(self, location, status: str) -> None:
        if status not in {"Auto", "On", "Off"}:
            raise ValueError(
                f"Unexpected status {status}. The expected values are \"On\", \"Off\", and \"Auto\"."
            )
        if isinstance(location, int):
            for feature in self.features[location]["features"]:
                feature["status"] = status
        elif isinstance(location, str):
            if location not in self.feature_index:
                raise ValueError(f"Feature class name {location} not found.")
            else:
                for feature in self.features[self.feature_index[location]]["features"]:
                    feature["status"] = status
        elif isinstance(location, (tuple, list)) and (len(location) == 2):
            cls_id, f_id = location
            if not (isinstance(cls_id, (int, str)) and isinstance(f_id, (int, str))):
                raise ValueError(f"location must be int, str or tuple[<int, str>, <int, str>]")
            if isinstance(cls_id, str):
                if cls_id not in self.feature_index:
                    raise ValueError(f"Feature class name {cls_id} not found.")
                else:
                    cls_id: int = self.feature_index[cls_id]
            if isinstance(f_id, str):
                id_name_dict = {f["name"]: i for i, f in enumerate(self.features[cls_id]["features"])}
                if f_id not in id_name_dict:
                    raise ValueError(f"Feature name {f_id} not found.")
                else:
                    f_id: int = id_name_dict[f_id]
            self.features[cls_id]["features"][f_id]["status"] = status
        else:
            raise ValueError(f"location must be int, str or tuple[<int, str>, <int, str>]")

    def update_status(self, locations: Union[list, tuple], statuses: Union[list, str]):
        if isinstance(locations, list) and isinstance(statuses, list):
            if len(locations) == len(statuses):
                for location, status in zip(locations, statuses):
                    self._update_single_status(location, status)
            else:
                raise ValueError(f"Lengths don't match: {len(locations)} != {len(statuses)}")
        elif isinstance(locations, list):
            for location in locations:
                self._update_single_status(location, statuses)
        else:
            self._update_single_status(locations, statuses)


    # Feature-selection section
    def _optimize_ridge_alpha(self, target: np.ndarray, features: np.ndarray) -> float:
        # TODO: push these parameters into some field
        adjustment_scale = (np.arange(21)-10).astype(np.float64)
        exp_sequence = np.array(
            [2.**(adjustment_scale.shape[0] ** (-i/2)) for i in range(15)]
        ).astype(np.float64)        
        alpha = 1.
        # This is a log-grid search for minimal leave-one-out variance
        # repeated several times with grid successively dencifying
        # around the most recent optimum candidate
        for e in exp_sequence:
            alphas = alpha * np.power(e, adjustment_scale)
            cv = (
                RidgeCV(alphas=alphas, fit_intercept=False, store_cv_values=True)
                .fit(features, target).cv_values_.sum(axis=0)
            )
            alpha = alphas[np.argmin(cv)]
        return alpha

    def _ridge_regression(self, target: np.ndarray, features: np.ndarray) -> np.ndarray:
        # Finding an optimal alpha for ridge regression
        alpha = self._optimize_ridge_alpha(target, features)
        # Running a ridge regression to get an efficient estimate of the coefficients
        return Ridge(alpha=alpha).fit(features, target).coef_.reshape(-1)

    def __lasso_plots(self, alphas: np.ndarray, coefs: np.ndarray, feature_names: list) -> None:
        plt.figure(figsize=(15, 10))
        # TODO: Fix the labeling
        for label, coef_l in zip(feature_names, coefs):
#         for label, coef_l in enumerate(coefs):
            if (coef_l != 0).sum() != 0:
                plt.plot(-np.log10(alphas), coef_l, label=label)
        plt.axis('tight')
        plt.legend()
        plt.show()

    def _lasso_regression(self, target: np.ndarray, features: np.ndarray, feature_names: list) -> np.ndarray:
        # TODO: push these parameters into some field
        eps, n_alphas = 1e-5, int(5e3)
        # Running the sequence of LASSO regressions
        alphas, coefs, _ = lasso_path(features, target[:, 0], eps=eps, n_alphas=n_alphas, fit_intercept=False)
        # A nice picture of LASSO output  # (if requested)
#         if self.display_lasso:
        self.__lasso_plots(alphas, coefs, feature_names)
        return coefs

    @staticmethod
    def _mask_order_extractor(coefs: np.ndarray) -> list:
        # Extracting the unique combinations of regressors with non-zero coefficients
        bool_mask_list, bool_mask_set = list(), set()
        for lane in coefs.T:
            bool_mask = tuple(lane != 0)
            if any(bool_mask) and (bool_mask not in bool_mask_set):
                bool_mask_set.add(bool_mask)
                bool_mask_list.append(list(bool_mask))
        return bool_mask_list

    def _ordered_feature_selection(self, target: np.ndarray, features: np.ndarray, mask_list: list) -> tuple:
        # Checking the unique combinations of regressors using leave-one-out OLS
        self.selection_log = [None] * len(mask_list)
        for i, bool_mask in enumerate(mask_list):
            considered_cv = (  # Not sure why fit_intercept was true when I started refactoring....
                RidgeCV(alphas=np.array([1e-100]), store_cv_values=True, fit_intercept=False)
                .fit(features[:, bool_mask], target)
                .cv_values_[:, 0]
            )
            self.selection_log[i] = {
                "mask": bool_mask,
                "cv_MSE": considered_cv.mean(),
            }  # Custom selection metrics can be employed, but for OLS there is no point
        min_cv, best_idx = float("inf"), None
        for i in range(len(mask_list)):
            if self.selection_log[i]["cv_MSE"] < min_cv:
                min_cv = self.selection_log[i]["cv_MSE"]
                best_mask = self.selection_log[i]["mask"]
        return best_mask

    def select_via_adaptive_lasso(
            self, target: np.ndarray, features: np.ndarray,
            feature_names: Optional[list] = None
    ) -> tuple:
        """Adaptive LASSO to select the features"""
        # Filling the default values into feature_names
        if feature_names is None:
            feature_names = list(map(str, range(features.shape[1])))
        # Checking consistency of feature_names' list
        if len(feature_names) != features.shape[1]:
            raise ValueEerror(
                "Feature names' list length mismatch: "
                + f"got {len(feature_names)} names for {features.shape[1]} features"
            )

        # Initial estimator for lasso adaption
        initial_estimator = self._ridge_regression(target, features)
        # Modifying the features to switch LASSO into adaptive mode
        adapted_features = features * np.power(initial_estimator, 2.)  # TODO: push this parameter into some field
        # Running adaptive LASSO and extracting the coefficient consideration order
        mask_list = self._mask_order_extractor(self._lasso_regression(target, adapted_features, feature_names))
        # Checking the unique combinations of regressors using leave-one-out OLS
        return self._ordered_feature_selection(target, features, mask_list)

    def _extract_indexes_for_selection(self) -> tuple:
        on_index, auto_index, auto_names = list(), list(), list()
        for cls_idx, feature_cls in enumerate(self.features):
            for f_idx, feature in enumerate(feature_cls["features"]):
                if feature["status"] == "On":
                    on_index.append((cls_idx, f_idx))
                elif feature["status"] == "Auto":
                    auto_index.append((cls_idx, f_idx))
                    auto_names.append("{0}/{1}".format(feature_cls["name"], feature["name"]))
        return on_index, auto_index, auto_names

    def _remove_the_preselected_features(self, on_index: list, auto_index: list) -> tuple:
        auto_features = np.hstack([
            self.features[idx[0]]["features"][idx[1]]["value"]
            for idx in auto_index
        ])
        if not on_index:
            return self.targets.copy(), auto_features
        on_features = np.hstack([
            self.features[idx[0]]["features"][idx[1]]["value"]
            for idx in on_index
        ])
        projection_residual = lambda X, Y: (
            Y - X.dot(np.dot(
                np.linalg.inv(np.dot(X.T, X) / X.shape[0]),
                np.dot(X.T, Y) / X.shape[0]
            ))
        )
        target_residuals = projection_residual(on_features, self.targets)
        auto_residuals = projection_residual(on_features, auto_features)
        return target_residuals, auto_residuals
    
    def _regress_on_selected_features(self, selected_index: list) -> dict:
        features = np.hstack([
            self.features[idx[0]]["features"][idx[1]]["value"]
            for idx in selected_index
        ])
        XtX_inv = np.linalg.inv(np.dot(features.T, features) / self.obs_count)
        beta = np.dot(XtX_inv, np.dot(features.T, self.targets) / self.obs_count)
        errors = (self.targets - np.dot(features, beta)).reshape((-1, 1, 1))
        XeeX = np.mean(
            np.expand_dims(features, axis=2) * np.expand_dims(features, axis=1) * np.square(errors),
            axis=0
        )
        beta_error_cov = np.dot(XtX_inv, np.dot(XeeX, XtX_inv)) / self.obs_count
        beta_error_std = np.sqrt(np.diag(beta_error_cov)).reshape((-1, 1))
        beta_t_stat = beta / np.sqrt(np.diag(beta_error_std))
        r2 = 1 - (np.std(errors) / np.std(self.targets))**2

        return {
            "beta": beta * self.targets_std,
            "beta_error_cov": beta_error_cov,
            "beta_error_std": beta_error_std,
            "beta_t_stat": beta_t_stat,
            "R2": r2,
        }

    def select_features(self):
        # Extracting the features to use in the selection
        on_index, auto_index, auto_names = self._extract_indexes_for_selection()
        # FWL-removal of the forcedly-included features
        target_mod, fearues_mod = self._remove_the_preselected_features(on_index, auto_index)
        # Selecting the features from the modified set (and modified targets)
        auto_mask = self.select_via_adaptive_lasso(target_mod, fearues_mod, auto_names)
        # Compressing the indexes of selected features into one
        selected_index = on_index + [idx for i, idx in enumerate(auto_index) if auto_mask[i]]
        return self._regress_on_selected_features(selected_index)


# Testing section
np.random.seed(9001)
rho = 0.3
N, k = 150, 5
X = np.random.normal(0, 1, (N, k))
X = X.dot(np.ones((k, k))*rho + np.eye(k)*(1-rho))
X /= np.std(X, axis=0, keepdims=True)
beta = np.array([[1], [0], [-2], [1], [1]])
y = np.dot(X, beta)

selecting_test = LinearFeatureSelector(y)
selecting_test.add_features(features=X)
selecting_test.update_status(("features", "3"), "On")
print(selecting_test.display_features())
# selecting_test.select_features()

                 status  corr.w.target  norm. test p-val
class    feature                                        
features 0         Auto       0.577656          0.948887
         1         Auto       0.428647          0.951836
         2         Auto      -0.015539          0.889784
         3           On       0.539508          0.735060
         4         Auto       0.576777          0.632004


In [7]:
# Just rewrite this stuff

class SeasonalityExtractor(LinearFeatureSelector):
    def __init__(
            self, targets, dts, default_status: str = "Auto",
            start_dt=pd.to_datetime("2017-12-31"), scale=None, periods=None
    ):
        super().__init__(targets, default_status)

        if periods is None:
            periods = [365.2425 / i for i in [.5, 1, 2, 3, 4, 6, 12]]
        self.periods = periods
        if scale is None:
            scale = np.timedelta64(24 * 3600, "s")
        
        self.diffs = np.asarray((dts - start_dt) / scale).reshape((-1, 1))
        assert np.all(self.diffs == np.round(self.diffs, decimals=0)), "The scaling does not provide round values"
        assert self.diffs.shape[0] == self.obs_count, "Input shape mismatch"
        
        self._add_trend()
        self._add_weekends()
        self._add_weekdays(0)  # Dropping Sundays


    def _prepare_feature_cls_dict(self, name: str, **kwargs) -> dict:
        out = {"cyclical": False}
        out.update(super()._prepare_feature_cls_dict(name, **kwargs))
        return out


    def cyclify_feature(self, feature_cls: Union[int, str], status: Optional[str] = "Off"):
        if status not in {"Off", "On", "Auto", None}:
            raise ValueError(f"Status {status} is not supported")
        # Processing the string and indeger indexing
        if isinstance(feature_cls, int):
            old_feature_idx = feature_cls
            feature_cls = self.features[feature_cls]["name"]
        elif isinstance(feature_cls, str):
            old_feature_idx = self.feature_index.get(feature_cls, None)
        else:
             raise ValueError(f"Expcted feature_cls to be str or int, got {type(feature_cls)}")
        # Checking that the feature to cyclify exists. And that it is not cyclified yet
        if old_feature_idx is None:
            raise ValueError(f"Feature {feature_cls} not found")
        else:
            new_cls = self.features[old_feature_idx].copy()  # If the original class is found, store it
        if (f"{feature_cls}_cycled" in self.feature_index) or (new_cls["cyclical"]):
            raise ValueError(f"Feature {feature_cls} is cyclified")
        # Making the class for the new features
        new_cls = self._prepare_feature_cls_dict(f"{feature_cls}_cycled", cyclical=True)
        # Filling the class with features
        new_cls["features"] = list()
        for feature in new_cls["features"]:
            value = feature.pop("value")
            for period in self.periods:
                feature_to_add = feature.copy()
                feature_to_add["name"] += f"__{period:07.3f}"
                if status is not None:
                    feature_to_add["status"] = status
                feature_to_add["sin_cos"] = [
                    value * np.sin((self.diffs * (2*np.pi)) * period) / (.5**.5),
                    value * np.cos((self.diffs * (2*np.pi)) * period) / (.5**.5),
                ],
                new_cls["features"].append(feature_to_add)
        # Adding the feature class, and modifying the index
        self.features.append(new_cls)
        self.feature_index[new_cls["name"]] = len(self.features)
        self._check_integrity()  # Debug run
        self.log += "Added {0}\n".format(new_cls["name"])

        
    def _prepare_feature_dict(self, name: str, **kwargs) -> dict:
        out = {
            "name": name,
            "status": self.default_status,
        }
        out.update(kwargs)
        return out
        
        
    def _add_trend(self):
        if "trend" in self.feature_index:
            raise ValueError("Trend is already added to the model")
        self.features.append(
            self._prepare_feature_cls_dict(
                "trend",
                features=[
                    self._prepare_feature_dict(
                        "linear",
                        value=(
                            (self.diffs - np.mean(self.diffs, dtype=np.float64))
                            / np.std(self.diffs, dtype=np.float64)
                        ),
                    )
                ],
            )
        )
        self.feature_index["trend"] = len(self.features)
        self._check_integrity()  # Debug run
        self.log += "Added trend\n"

    @staticmethod
    def __extract_weekdays(days_to_skip=None):
        weekdays = list(range(7))
        if days_to_skip is not None:
            if isinstance(days_to_skip, int):
                weekdays = [wd for wd in weekdays if wd != days_to_skip]
            elif isinstance(days_to_skip, (list, tuple, set, dict, np.array)):
                weekdays = [wd for wd in weekdays if wd not in days_to_skip]
            else:
                raise ValueError(f"days_to_skip has unrecognised type {type(days_to_skip)}")
        return weekdays
        
    def _add_weekdays(self, days_to_skip=None):
        if "weekdays" in self.feature_index:
            raise ValueError("Weekdays are already added to the model")
        status = "Off" if "weekends" in self.feature_index else self.default_status
        self.features.append(
            self._prepare_feature_cls_dict(
                "weekdays",
                linearly_dependent=(days_to_skip is None),
                features=[
                    self._prepare_feature_dict(
                        f"weekday_{wd}",
                        status=status,
                        value=((self.diffs.astype(int)%7 == wd) - (1./7.))/(6.**.5 / 7.),
                    )
                    for wd in self.__extract_weekdays(days_to_skip)
                ],
            )
        )
        self.feature_index["weekdays"] = len(self.features)
        self._check_integrity()  # Debug run
        self.log += "Added weekdays\n"
        
    def _add_weekends(self):
        if "weekends" in self.feature_index:
            raise ValueError("Weekends are already added to the model")
        status = "Off" if "weekdays" in self.feature_index else self.default_status
        self.features.append(
            self._prepare_feature_cls_dict(
                "weekends",
                features=[
                    self._prepare_feature_dict(
                        "weekend",
                        status=status,
                        value=((self.diffs.astype(int)%7 >= 5) - (2./7.))/(10.**.5 / 7.),
                    )
                ],
            )
        )
        self.feature_index["weekends"] = len(self.features)
        # Debug run:
        self._check_integrity()
        self.log += "Added weekends\n"
        
    def _add_cycles(self, periods):
        if "cycles" in self.feature_index:
            raise ValueError("Main cycles are already added to the model")
        self.features.append(
            self._prepare_feature_cls_dict(
                "cycles",
                cyclical=True,
                features=[
                    self._prepare_feature_dict(
                        f"cycle__{period:07.3f}",
                        sin_cos=[
                            np.sin((self.diffs * (2*np.pi)) * period) / (.5**.5),
                            np.cos((self.diffs * (2*np.pi)) * period) / (.5**.5),
                        ],
                    )
                    for period in self.periods
                ],
            )
        )
        self.feature_index["cycles"] = len(self.features)
        self._check_integrity()  # Debug run
        self.log += "Added main cycles"+ " ".join(map(str, periods)) + "\n"


    def fit_phases(self):
        """A ridge regression with all available features"""
        self.log += "Starting the phase-shift for cyclical features\n"
        # Creating an array of features
        selected_features = list()
        cycle_sins = dict()
        for key, block in self.features.items():
            if isinstance(block, list):
                selected_features += block
            elif isinstance(block, dict):
                for period, pair in block.items():
                    if len(pair) != 2:
                        raise ValueError(
                            "Invalid feature specification:"
                            + " dict blocks should contain pairs of features"
                        )
                    cycle_sins[len(selected_features)] = (key, period)
                    selected_features += list(pair)
        self.log += "Regressing {0} features including {1} cycle pairs\n".format(len(selected_features), len(cycle_sins))
        selected_features = np.hstack(selected_features)
        # Running a ridge regression to get some efficient estimate of the coefficients
        ridge_coefs = self._ridge_regression(selected_features)
        # Collapsing pairs of cyclical features' coefficients into respective phases
        for sin_id, index in cycle_sins.items():
            b_sin, b_cos = ridge_coefs[sin_id], ridge_coefs[sin_id+1]
            scale = np.sqrt((b_sin*b_sin) + (b_cos*b_cos))
            scaled_sin, scaled_cos = b_sin/scale, b_cos/scale
            phase = np.arccos(scaled_cos) * (scaled_sin/abs(scaled_sin)) / np.pi
            key, period = index
            self.log += "    Phase of {0}-{1} is {2}\u03C0\n".format(period, key, round(phase, 3))
            self.features[key][period] = (
                (self.features[key][period][0]*scaled_sin)
                + (self.features[key][period][1]*scaled_cos)
            )
        self.log += "Collapsed the phase-shift for cyclical features\n"