In [None]:
import pandas as pd
import polars as pl
from matplotlib import pyplot as plt
import numpy as np 
import polars_ols
from skmisc.loess import loess
from matplotlib.gridspec import GridSpec
from mpl_bsic import apply_bsic_style, apply_bsic_logo, export_figure
import matplotlib.colors as mcolors

# Loading Data

In [None]:
bond_data = pl.read_parquet('../../data/final_data/bond_data_oas.pq')
print(bond_data.head())

In [None]:
bond_data = bond_data.with_columns(
    spread_duration = pl.col('oas') / pl.col('bond_yield') * pl.col('duration')
)

## Quickly filtering out bonds close to default

In [None]:
abnormal_prices = bond_data.filter(pl.col('bond_ret') >1).select('cusip').unique()
tot_cusips = bond_data.n_unique('cusip')

print(
    f'we have {len(abnormal_prices)} CUSIPs to remove, out of {tot_cusips} ({len(abnormal_prices) / tot_cusips * 100:.2f}%)'
)
# Segmenting in Buckets, based on Rating, Industry, Duration

In [None]:
abnormal_prices = bond_data.filter(pl.col('bondprc') < 75).select('cusip').unique()
tot_cusips = bond_data.n_unique('cusip')

print(f'we have {len(abnormal_prices)} CUSIPs to remove, out of {tot_cusips} ({len(abnormal_prices) / tot_cusips * 100:.2f}%)')
bond_data = bond_data.filter(~pl.col('cusip').is_in(abnormal_prices))

# Segmenting in Buckets, based on Rating, Industry, Duration

## Rating

Group by rating macro-categories

In [None]:
bond_data_with_ratings = bond_data.with_columns(
    rating_bucket=pl
        .when(pl.col('rating') <= 7).then(pl.lit('A'))
        .when(pl.col('rating') <= 16).then(pl.lit('B'))
        .otherwise(pl.lit('B'))
)
bond_data = bond_data_with_ratings

## Industry

We choose the following macro-groups, based on the first digit of the SIC Code: 

- 0, 1: Agriculture, Forestry, Fishing (0) + Mining (1) + Construction (1)
- 2, 3, 5: Manufacturing (2,3), Whole Sale Trade + Retail Trade (5)
- 4, 7, 8, 9: Transportation and Public Utilities (4), Public Administration (9), Service (7, 8)
- 6: Finance, Insurance, Real Estate (6)


In [None]:
bond_data.select('PERMNO', 'sic_code').head(4)

In [None]:
bond_data = bond_data.with_columns(
    industry_bucket=pl.col('sic_code').cast(str).str.slice(0,1).cast(int)
).with_columns(
    # assign bucket from 1 to 5 for the corresponding macro-group
    industry_bucket=pl
        .when(pl.col('industry_bucket').is_in([0,1])).then(pl.lit(1))
        .when(pl.col('industry_bucket').is_in([2,3,5])).then(pl.lit(2))
        .when(pl.col('industry_bucket').is_in([4,9])).then(pl.lit(3))
        .when(pl.col('industry_bucket') == 6).then(pl.lit(4))
        .when(pl.col('industry_bucket').is_in([7,8])).then(pl.lit(5))
)

bond_data.select('industry_bucket').head(3)

In [None]:
bond_data.select('industry_bucket').unique()

## Duration 

In [None]:
bond_data.select('duration').quantile(0.5)

In [None]:
bond_data = bond_data.with_columns(
    duration_bucket=pl
        .when(pl.col('duration') <= 5.5).then(pl.lit('shortdur'))
        .otherwise(pl.lit('longdur'))
)


## Aggregate the buckets in one column

In [None]:
bond_data = bond_data.with_columns(
    bucket=(pl.col('industry_bucket').cast(str) + '_' + pl.col('rating_bucket') + '_' + pl.col('duration_bucket').cast(str))
).drop(['industry_bucket', 'rating_bucket', 'duration_bucket'])

In [None]:
bond_data.select('bucket').head(3)

In [None]:
bonds_per_date_bucket = bond_data.group_by(['date', 'bucket']).n_unique().select('date', 'bucket', 'cusip')

bonds_per_date_bucket.head()

In [None]:
bonds_per_date_bucket.filter(pl.col('cusip') < 10).shape[0] / bonds_per_date_bucket.shape[0]

# Compute Residual Returns

In [None]:
def weighted_median_pl(data: pl.type_aliases.Sequence[pl.Series]):
    """Compute weighted median of observations with Polars

    To be used inside map_groups, to which you specify as expressions first the values, then the weights to use.
    The function takes two pl.Series: the first is the values to use, the second is the weights. 
    The function returns the weighted median for the given group of observations
    """
    values, weights = data
    values = values.to_numpy()
    weights = weights.to_numpy()
    i = np.argsort(values) 
    c = np.cumsum(weights[i])

    weighted_median = values[i[np.searchsorted(c, 0.5 * c[-1])]]
    return weighted_median


In [None]:
def compute_residual_returns(df: pl.DataFrame, oas_column: str = 'oas', use_median: bool = False):
    """Compute Residual return as unadjusted return - systematic return.
    
    * dxs = Duration * Spread
    * Systematic Return = Relative DTS * Weighted average return of bond's bucket
    * Relative dxs = Bond DTS / weighted average DTS of bond bucket
    
    * Residual return = bond_ret_t+1 - Systematic Return
    
    """
    df = df.with_columns(
        dxs = pl.col('spread_duration') * pl.col(oas_column)
    )
    
    grouped = df.select('bucket', 'dxs', 'exretnc_dur_t+1', 'bond_amount_out').drop_nulls().group_by('bucket')
    
    if use_median: 
        weighted_dxs = grouped.agg(
            weighted_dxs_median=pl.map_groups(exprs=['dxs', 'bond_amount_out'], function=weighted_median_pl),
            weighted_ret_median=pl.map_groups(exprs=['exretnc_dur_t+1', 'bond_amount_out'], function=weighted_median_pl)
        )
    else: 
        weighted_dxs = grouped.agg(
            weighted_dxs_mean=(pl.col('dxs').dot(pl.col('bond_amount_out')) / pl.sum('bond_amount_out')),
            weighted_ret_mean=(pl.col('exretnc_dur_t+1').dot(pl.col('bond_amount_out')) / pl.sum('bond_amount_out')),
        )
        
    dxs_col = 'weighted_dxs_' + ('median' if use_median else 'mean')
    ret_col = 'weighted_ret_' + ('median' if use_median else 'mean')
    
    df = (df.select(pl.all().exclude(dxs_col, ret_col))
        .join(weighted_dxs, how='left', on='bucket')
        .with_columns(
            relative_dxs = pl.col('dxs') / pl.col(dxs_col)
        ).with_columns(
            systematic_return = pl.col('relative_dxs') * pl.col(ret_col)
        ).with_columns(
            (pl.col('bond_ret_t+1') - pl.col('systematic_return')).alias('residual_return_t+1'),
        ))
    
    return df

In [None]:
bond_data = compute_residual_returns(bond_data, use_median=True, oas_column='oas')

In [None]:
bond_data.filter(pl.col('bond_ret_t+1') > 0.5)

In [None]:
bond_data.select('residual_return_t+1').describe()

# Scoring Signals based on the quantile 

## Signal Class

In [None]:
class Signal(): 
    def __init__(self, df: pl.DataFrame, column: str, n_quantiles: int, signal_value_name: str):
        self.df = df
        self.column = column
        self.n_quantiles = n_quantiles
        
        self.quantile_col_name = f'{column}_quantile'
        self.delta_col_name = f'{column}_delta'
        self.mean_col_name = f'{column}_bucket_mean'
        self.signal_value_name = signal_value_name
        self.return_col = 'residual_return_t+1'
        
        self.transition_probabilities = None
        self.spline = None
        self.alpha_estimates = None
        
    def compute_quantiles(self):
        """Divide the cols in quantiles based on the bucket groups"""
        
        quantile_col_name, delta_col_name, mean_col_name = self.quantile_col_name, self.delta_col_name, self.mean_col_name
        col = self.column
        n_quantiles = self.n_quantiles
        df = self.df
        
        # create the bins and labels to be used in qcut
        bins = np.linspace(0,1,n_quantiles+1)[1:]
        labels = [str(i+1) for i in range(n_quantiles+1)]
        
        # compute the mean of the signal for each bucket at each date
        col_mean_by_bucket = df.select('date', 'bucket', col).group_by(['date', 'bucket']).agg(
            pl.col(col).drop_nans().mean().alias(mean_col_name) # .mean breaks with NaNs, so drop them 
        )
        
        # join the dataframes and compute the difference with respect to the group average (computed before)
        deltas = (df
                  .select(pl.all().exclude(quantile_col_name, mean_col_name))
                  .join(col_mean_by_bucket, how='left', on=['date', 'bucket'])
                  .with_columns(
                        (pl.col(col) - pl.col(mean_col_name)).alias(delta_col_name)
                    )           
                  )
        # just select the cols we need, to make computations faster
        deltas = deltas.select('date', 'cusip', 'bucket', delta_col_name)
    
        data = []
        total_items = col_mean_by_bucket.shape[0]
        print('computing... ', end='')
    
        i = 0
        for group in col_mean_by_bucket.iter_rows(named=True):
            i += 1
            
            # filter for NaNs and Nulls as well 
            mask = (pl.col('date') == group['date']) & (pl.col('bucket') == group['bucket']) & (pl.col(delta_col_name).is_not_nan() & (pl.col(delta_col_name).is_not_null()))
            
            group_data = deltas.filter(mask)
            # if the DataFrame is emtpy just continue, otherwise qcut will throw an error
            if group_data.shape[0] == 0:
                continue
            
            group_data = group_data.with_columns(
                pl.col(delta_col_name).qcut(bins,labels=labels, allow_duplicates=True).alias(quantile_col_name)
            )
                
            data.append(group_data)
            
            if i % 1000 == 0: 
                print(f'{i}/{total_items} ', end='')
        
        print('... done!')
        
        quantiles = pl.concat(data, how='vertical')
        quantiles = quantiles.with_columns(
            pl.col(quantile_col_name).cast(int).alias(quantile_col_name)
        )
        
        merged_df = df.join(quantiles.drop('bucket'), how='left', on=['date', 'cusip'])
        
        self.df = merged_df
        return merged_df
    
    def information_ratio(self): 
        quantile_col, return_col = self.quantile_col_name, self.return_col
        if not quantile_col in self.df.columns: 
            self.compute_quantiles()
        
        df = self.df 
        
        # group returns by date and the quantile, and compute the average return of the portfolio at the end of every month
        signal_monthly_returns = df.group_by(['date', quantile_col]).agg(
            pl.col(return_col).drop_nans().mean().alias('mean_return'), # mean return of the portfolio at EoM
        ).filter(pl.col(quantile_col).is_not_null())
        
        signal_monthly_returns = signal_monthly_returns.sort(['date', quantile_col])
        
        long_short_returns = signal_monthly_returns.pivot(index='date', columns=quantile_col, values='mean_return').with_columns(
            (pl.col('10')-pl.col('1')).alias('long_short')
        ).select('date', 'long_short')
        market_returns = df.group_by('date').agg(pl.col(return_col).mean().alias('mkt_return'))
        
        portfolio_returns = long_short_returns.join(market_returns, how='left', on='date').with_columns(
            delta=(pl.col('long_short')-pl.col('mkt_return'))
        )
        
        information_ratio = portfolio_returns['delta'].mean() / portfolio_returns['delta'].std()
        
        return information_ratio * np.sqrt(12)

    def compute_expected_alpha(self): 
        quantile_col, return_col = self.quantile_col_name, self.return_col
        if not quantile_col in self.df.columns: 
            self.compute_quantiles()
        
        df = self.df 
        
        
        # group returns by date and the quantile, and compute the average return of the portfolio at the end of every month
        monthly_returns_by_bucket = df.group_by(['date', quantile_col]).agg(
            pl.col(return_col).drop_nans().mean().alias('mean_return'), # mean return of the portfolio at EoM
        )
        
        final_portfolio_returns = monthly_returns_by_bucket.group_by(quantile_col).agg(
            pl.col('mean_return').mean().alias('avg_monthly_return'),
            pl.col('mean_return').std().alias('monthly_stdev'),
            ((pl.col('mean_return').mean() / pl.col('mean_return').std()) * np.sqrt(12)).alias('sharpe_ratio'),
        )
        
        # insert the key to sort by        
        alpha_estimates = final_portfolio_returns.sort(by=quantile_col).drop_nulls().select(quantile_col, 'avg_monthly_return', 'monthly_stdev')
        
        data = alpha_estimates.select(quantile_col, 'avg_monthly_return').to_numpy()
        x, y = data[:, 0], data[:, 1]
        
        spline = loess(x, y, surface='direct')
        spline.fit()
        
        self.spline = spline    
        self.alpha_estimates = alpha_estimates

        return alpha_estimates, spline

    def fit_alpha_estimates(self):
        df, spline = self.df, self.spline
        quantile_col = self.quantile_col_name
        signal_value_name = self.signal_value_name
        
        if quantile_col not in df.columns:
            df = self.compute_quantiles()
            
        if spline is None: 
            self.compute_expected_alpha()
            spline = self.spline
        
        
        def f(x: pl.Series): 
            data = x.to_numpy()
            n = len(data)
            
            values = np.zeros(n)
            values[:] = np.nan
            
            for i in range(n):
                val = data[i]
                if np.isnan(val):
                    continue
                signal_val = spline.predict([val]).values
                values[i] = signal_val[0]
                
            return values
        
        df = df.with_columns(
            pl.col(quantile_col).map_batches(function=f, return_dtype=pl.Float64).fill_nan(None).alias(signal_value_name),
        ).with_columns(
            ((pl.col(signal_value_name) - pl.col(signal_value_name).drop_nulls().mean()) / pl.col(signal_value_name).drop_nulls().std()).alias(signal_value_name)
        )
        
        self.df = df 
        return df 
    
    
    def compute_transition_matrix(self, lag: int = 1):
        """Compute the transition probabilities for quantiles for a given signal, given a certain lag (1 unit of lag is 1 month of time)"""
        def _update_transition_prob_matrix(chain, counts_matrix, lag):
            flat_coords = np.ravel_multi_index((chain[:-lag], chain[lag:]), counts_matrix.shape)
            return np.bincount(flat_coords, minlength=counts_matrix.size).reshape(counts_matrix.shape)
    
        def _convert_to_probabilities(transition_matrix: np.ndarray): 
            M = transition_matrix.copy()
            for row in M: 
                n = sum(row)
                if n > 0: 
                    row[:] = [f/n for f in row] 
            
            return M
        
        quantile_col, n_quantiles = self.quantile_col_name, self.n_quantiles
        if not quantile_col in self.df.columns: 
            self.compute_quantiles()
            
        df = self.df
        # group by the cusip, and get the evolution of the quantile during time for each bond
        quantiles_evolution = df.sort('date').filter(pl.col(quantile_col).is_not_null()).group_by('cusip', maintain_order=True).agg(pl.col(quantile_col))
        
        # initialize the matrix of transition probabilities
        transition_probabilities = np.zeros((n_quantiles, n_quantiles), dtype=np.float64)
        
        # iterate through the different evolutions and update the transition probabilities
        for row in quantiles_evolution.iter_rows(): 
            chain = row[1]
            if np.min(chain) != 0: 
                chain -= np.min(chain)
            
            transition_probabilities[:, :] += _update_transition_prob_matrix(chain, transition_probabilities, lag)
        
        # convert these to actual probabilities
        transition_probabilities = _convert_to_probabilities(transition_probabilities)
        self.transition_probabilities = transition_probabilities
        
        return transition_probabilities
        
    def plot(self, title: str, probabilities_lag: int = 1): 
        quantile_col = self.quantile_col_name 
        n_quantiles = self.n_quantiles
        
        if self.transition_probabilities is None: 
            self.compute_transition_matrix(probabilities_lag)
        if self.spline is None: 
            self.compute_expected_alpha()
        
        transition_probs = self.transition_probabilities
        alpha_estimates, spline = self.alpha_estimates, self.spline
        
        fig = plt.figure(figsize=(10,8))
        gs = GridSpec(2,2, figure=fig, bottom=0.1, top=0.9, width_ratios=(3, 2), hspace=0.3) 
        
        barchart_ax = fig.add_subplot(gs[0,0])
        probs_ax = fig.add_subplot(gs[0,1])
        spline_ax = fig.add_subplot(gs[1,:])
        apply_bsic_style(fig, sources=['WRDS', 'openbondassetpricing'])
        
        bsic_cmap = mcolors.LinearSegmentedColormap.from_list(
            "bsic", ["#8EC6FF", "#38329A", "#0E0B54", "#601E66"]
        )
        
        # plot the barchart
        barchart_ax.bar(x=alpha_estimates[quantile_col], height=alpha_estimates['avg_monthly_return'] * 1e2, alpha=0.75)
        volatilities_ax = barchart_ax.twinx()
        volatilities_ax.set_ylabel('Monthly Volatility (bps)')
        volatilities_ax.plot(alpha_estimates[quantile_col], alpha_estimates['monthly_stdev'] * 1e4, label='Volatility', color=bsic_constants.BSIC_COLORS[2], linestyle=(0,(5,10)), marker='o')
        volatilities_ax.legend()
        
        barchart_ax.set_title('Average Monthly Return by Quantile', **bsic_constants.TITLE_STYLE)
        barchart_ax.set_xlabel('Quantile')
        barchart_ax.set_ylabel('Expected Monthly Alpha (%)')
        
        # plot the transition probability matrix
        probs_ax.imshow(transition_probs, cmap=bsic_cmap, interpolation='nearest')
        probs_ax.set_axis_off()
        probs_ax.set_title(f'Transition Probabilities ({probabilities_lag}Mo, %)', **bsic_constants.TITLE_STYLE)
        for (j,i), label in np.ndenumerate(transition_probs):
            probs_ax.text(i,j,int(label * 100),ha='center',va='center', color='white', fontweight='bold', fontsize=10)
        
        # plot the spline in the bottom
        spline_ax.axhline(y=0, c='black', lw=1)
        
        x_new = np.linspace(0,n_quantiles, 100)
        pred = spline.predict(x_new, stderror=True)
        conf = pred.confidence()
        
        lowess_vals = pred.values
        ll = conf.lower
        ul = conf.upper
        
        
        spline_ax.fill_between(x_new,ll,ul,alpha=0.3)
        spline_ax.scatter(x=alpha_estimates[quantile_col], y=alpha_estimates['avg_monthly_return'], label='Expected Alpha', color=bsic_constants.BSIC_COLORS[2])
        spline_ax.plot(x_new, lowess_vals)
        
        spline_ax.set_title('Fitted LOWESS Estimator, with 95% CI', **bsic_constants.TITLE_STYLE)
        spline_ax.set_xlabel('Quantile')
        spline_ax.set_ylabel('Expected Monthly Alpha')
        
        suptitle_style = bsic_constants.TITLE_STYLE.copy() 
        suptitle_style['fontsize'] = 16
        fig.suptitle(title, **suptitle_style)
        
        # apply_bsic_style(fig, spline_ax)
        # apply_bsic_style(fig, spline_ax)
        
        
        # subtitle = "Expected Alpha and Volatility of Carry Factor, Transition Probabilities and fitted LOWESS curve."
        # add_title_subtitle(fig, title, subtitle)
        return fig
        

## Carry

For carry, we rank each bond based on its OAS. 

In [None]:

carry_signal = Signal(bond_data, 'oas', 10, 'carry_signal_value')
carry_signal.compute_quantiles()
ir_carry = carry_signal.information_ratio()


In [None]:
fig = carry_signal.plot(title='Carry Factor', probabilities_lag=6)

In [None]:
export_figure(fig, '../../exports/carry_factor.svg')

In [None]:
ir_carry

In [None]:
carry_signal.fit_alpha_estimates()

In [None]:
bond_data = carry_signal.df

bond_data.head()

In [None]:
bond_data.select('carry_signal_value')

## Momentum (Excess Credit Return)

We use the **Credit Excess Return**, as defined by the AQR paper, to define momentum.

$$
CER_t=\frac{1}{12}Spread_{t-1}-\text{Spread Duration}_{t-1}\times(Spread_t-Spread_{t-1})
$$

Which translated to our data is

$$
CER_t=\frac{1}{12}OAS_{t-1}-\text{duration}_{t-1}\times(OAS_t-OAS_{t-1})
$$


In [None]:
bond_data.select('YIELD', 'bond_yield').null_count()

In [None]:
# compute excess credit return at each timestamp
bond_data = bond_data.sort(['cusip', 'date'])
bond_data = bond_data.with_columns(
    excess_credit_ret=(pl.col('oas').shift(1) / 12 - pl.col('spread_duration') * (pl.col('oas') - pl.col('oas').shift(1)))
)

mask = pl.col('cusip') != pl.col('cusip').shift(1)

bond_data = bond_data.with_columns(
    excess_credit_ret=pl.when(mask).then(None).otherwise(pl.col('excess_credit_ret'))
)

In [None]:
# compute the x_month rolling excess return
def momentum(df: pl.DataFrame, period: str, min_periods: int, n_months: int):
    momentum_returns = df.sort(['cusip', 'date']).rolling('date', group_by='cusip', period=period).agg(
        pl.when(pl.col('excess_credit_ret').len() > min_periods).then(pl.col('excess_credit_ret').add(1).product().sub(1)).otherwise(None).alias(f'mom_{n_months}mo'),
        pl.when(pl.col('excess_credit_ret').len() > min_periods).then(pl.col('bondprc').last() / pl.col('bondprc').first() - 1).otherwise(None).alias(f'price_mom_{n_months}mo'),
    )
    return momentum_returns


mom_6mo = momentum(bond_data, '5mo20d', 5, 6)
mom_12mo = momentum(bond_data, '11mo20d', 9, 12)

bond_data = bond_data.join(
    mom_6mo,
    how='left',
    on=['cusip', 'date'],
).join(
    mom_12mo,
    how='left',
    on=['cusip', 'date'],
)

In [None]:
credit_momentum_signal = Signal(bond_data, 'price_mom_12mo', 10, 'credit_momentum_signal_value')

credit_momentum_signal.compute_quantiles()

In [None]:
fig = credit_momentum_signal.plot('Credit Momentum', 6)

In [None]:
export_figure(fig, '../../exports/momentum_factor.svg')

In [None]:
credit_momentum_signal.fit_alpha_estimates()

In [None]:
bond_data = credit_momentum_signal.df

bond_data.select('credit_momentum_signal_value')

## Value

We use **Excess Spread to Peers** as a Value Signal in our strategy.

In [None]:
oas_bucket_means = bond_data.group_by(['date', 'bucket']).agg(pl.col('oas').drop_nans().mean().alias('ESP_bucket_mean_oas'))

In [None]:
oas_bucket_means

In [None]:
bond_data = bond_data.join(oas_bucket_means, on=['date', 'bucket'], how='left')

In [None]:
bond_data = bond_data.with_columns( 
    ESP = pl.col('oas') - pl.col('ESP_bucket_mean_oas'))

In [None]:
bond_data.select('de_ratio', 'intcov_ratio', 'debt_ebitda').null_count()

In [None]:
bond_data.head(3)

In [None]:
col = 'ESP'
bond_data.filter(pl.col(col).is_nan()).shape

In [None]:
bond_data.select('ESP').null_count()

In [None]:
bond_data = bond_data.with_columns(
    ESP=pl.col('ESP').fill_nan(None)
)

In [None]:
bond_data.select('ESP', 'debt_ebitda', 'de_ratio', 'intcov_ratio').dtypes

In [None]:
bond_data.select('ESP').null_count()

In [None]:
bond_data_with_residuals = bond_data.with_columns(
    esp_residual = pl
        .col('ESP')
        .least_squares
        .ols(pl.col("debt_ebitda"), pl.col("intcov_ratio"), pl.col("de_ratio"), add_intercept=True, mode="residuals", null_policy='drop', solve_method='svd')
        .over(['date', 'bucket'])
)

In [None]:
bond_data = bond_data_with_residuals

In [None]:
value_signal = Signal(bond_data, 'esp_residual', 10, 'esp_signal_value')

In [None]:
fig = value_signal.plot('ESP Factor (Value)', 6)

In [None]:
export_figure(fig, '../../exports/value_factor.svg')

In [None]:
value_signal.fit_alpha_estimates()

In [None]:
bond_data = value_signal.df

bond_data.select('esp_signal_value')

## Equity Momentum in Credit

In [None]:
def equity_momentum(df: pl.DataFrame, period: str, min_periods: int, n_months: int):
    eq_momentum_returns = df.sort(['cusip', 'date']).rolling('date', group_by='cusip', period=period).agg(
        pl.when(pl.col('equity_month_return').len() > min_periods).then(pl.col('equity_month_return').add(1).product().sub(1)).otherwise(np.nan).alias(f'eq_mom_{n_months}mo'),
        )
    return eq_momentum_returns

eq_mom_6mo = equity_momentum(bond_data, '5mo20d', 5, 6)
eq_mom_12mo = equity_momentum(bond_data, '11mo20d', 9, 12)

bond_data = bond_data.join(
    eq_mom_6mo,
    how='left',
    on=['cusip', 'date'],
).join(
    eq_mom_12mo,
    how='left',
    on=['cusip', 'date'],
)

In [None]:
bond_data.head(2)

In [None]:
equity_momentum_signal = Signal(bond_data, 'eq_mom_12mo', 10, 'equity_momentum_signal_value')

In [None]:
fig = equity_momentum_signal.plot('Equity Momentum in Credit', 6)

In [None]:
export_figure(fig, '../../exports/equity_momentum_factor.svg')

In [None]:
equity_momentum_signal.fit_alpha_estimates()

In [None]:
bond_data = equity_momentum_signal.df

bond_data.select('equity_momentum_signal_value')

## Factors Correlation

In [None]:
import seaborn as sns
import pandas as pd 

In [None]:
factors = bond_data.select('date', 'cusip', 'credit_momentum_signal_value', 'esp_signal_value', 'carry_signal_value', 'equity_momentum_signal_value')
factors = factors.to_pandas()
factors: pd.DataFrame

factors = factors.rename(columns={
    'credit_momentum_signal_value': 'Credit Momentum',
    'esp_signal_value': 'ESP Factor',
    'carry_signal_value': 'Carry Factor',
    'equity_momentum_signal_value': 'Equity Momentum'
})

In [None]:
correlations = factors.corr(numeric_only=True)
correlations

In [None]:


bsic_cmap = mcolors.LinearSegmentedColormap.from_list(
    "bsic", ["#8EC6FF", "#38329A", "#0E0B54", "#601E66"]
)
plt.rcParams.update({'axes.grid': False})
fig, ax = plt.subplots(figsize=(10,5))

mask = np.triu(np.ones_like(correlations, dtype=bool))
heatmap = sns.heatmap(correlations, mask=mask, annot=True, cmap=bsic_cmap, square=True)
ax.set_title('Factors Correlation')

apply_bsic_style(fig, ax)
# title = 'Factors Correlations'
# subtitle = 'test bla bla bla'
fig.subplots_adjust()
# add_title_subtitle(fig, title, subtitle)
ax.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False)         # ticks along the top edge are off) # labels along the bottom edge are off
ax.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    right=False,      # ticks along the bottom edge are off
    left=False)
# ticks along the top edge are off) # labels along the bottom edge are off
export_figure(fig, '../../exports/factors_correlations')

## Conclusing 

In [None]:
bond_data.head()

In [None]:
bond_data.null_count() / bond_data.shape[0]

In [None]:
bond_data.write_parquet('../../data/final_data/data_with_signals_all.pq', compression='zstd', compression_level=10)