In [61]:
import sys
from pathlib import Path

sys.path.append(r"/home/maxim-shibanov/Projects_Py/Risk-and-return-prediction-with-LLM/src")

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import copy
import matplotlib.lines as mlines
import re
import json, os
from scipy.stats import t
from joblib import Parallel, delayed
from tqdm.auto import tqdm
from textwrap import dedent
from linearmodels.panel import PanelOLS
from io import StringIO
from sklearn.preprocessing import StandardScaler

from data_analysis.data_fetcher.data_fetcher_class import DataFetcher
from data_collection.consts import  DB_PARAMS

In [62]:
fetcher = DataFetcher(
    db_params=DB_PARAMS,
    reports_table="reports",
    targets_table="targets_yf",
)

Available regressors:
 - avg_default_verbolizer
 - avg_shrink_verbolizer
 - doc_len
 - eps_surprise
 - f_size
 - full_list_default_verbolizer
 - full_list_shrink_verbolizer
 - hv_orig_score
 - lm_orig_score
 - max_abs_default
 - max_abs_shrink
 - max_default_verbolizer
 - max_shrink_verbolizer
 - md_hv1
 - md_hv2
 - md_hv3
 - md_lm1
 - md_lm2
 - md_lm3
 - min_default_verbolizer
 - min_shrink_verbolizer
 - stretch_default
 - stretch_shrink
Available sectors:
 - Technology (92)
 - Industrials (86)
 - Financial Services (85)
 - Healthcare (66)
 - Consumer Cyclical (58)
 - Consumer Defensive (40)
 - Real Estate (32)
 - Utilities (32)
 - Energy (30)
 - Basic Materials (23)
 - Communication Services (22)


  df = pd.read_sql_query(query, conn)
  df = pd.read_sql_query(query, conn)


In [63]:
df = fetcher.fetch_data(
      regressors=[
       'avg_default_verbolizer', 
       'avg_shrink_verbolizer',
       'max_abs_default',
       'max_abs_shrink',
        'max_default_verbolizer',
        'max_shrink_verbolizer',
        'min_default_verbolizer',
        'min_shrink_verbolizer',
        'eps_surprise',
        'f_size',
        'doc_len'
       ],
   prepare_fixed_effects=True
)

  df = pd.read_sql_query(query, conn)
  return pd.read_sql_query(query, conn)
  companies_df = pd.read_sql_query(query, conn)


In [64]:
df['f_size'] = df['f_size'] / 10**10

r_vol ~ (avg_default_verbolizer, avg_shrink_verbolizer, max_abs_default, max_default_verbolizer, max_shrink_verbolizer)

returns ~ max_abs_default, max_abs_shrink, min_default_verbolizer, min_shrink_verbolizer

e_returns ~ max_abs_shrink,

abn_returns ~ max_abs_shrink, max_default_verbolizer, max_shrink_verbolizer, min_default_verbolizer, min_shrink_verbolizer

In [65]:
class FEModeler:
    """
    A class to run panel fixed effects regressions using PanelOLS
    and extract key parameter statistics for a specified regressor.

    Attributes:
        df: The input panel data.
        var_names: Column names of the input DataFrame.
        params_dict: Dictionary storing regression parameters for each independent variable.
    """

    def __init__(self, df: pd.DataFrame, scale: bool = False):
        """
        Initialize the FEModeler with panel data.

        Args:
            df: Panel DataFrame with MultiIndex (entity, time).
            scale: Whether to standardize the regressors.
        """
        self.original_df = df.copy()
        self.df = df.copy()
        self.var_names = df.columns
        self.params_dict = {}
        self.scale = scale

    @staticmethod
    def extract_params(summary, regressor_name: str) -> dict:
        """
        Return β, its standard error, and all R² metrics for `regressor_name`.
        Crashes immediately if the required columns are not present.
        """
        import re
        from io import StringIO
        import pandas as pd

        # ---- 1. coefficient & std-err directly from the second table ----
        coef_html = summary.tables[1].as_html()
        coef_df = pd.read_html(StringIO(coef_html), header=0, index_col=0)[0]

        beta = float(coef_df.loc[regressor_name, "Parameter"])
        se   = float(coef_df.loc[regressor_name, "Std. Err."])   # KeyError if column absent

        # ---- 2. grab R² metrics from the header text ----
        txt = summary.as_text()

        def grab(label: str) -> float:
            return float(re.search(fr"{label}:\s*([-\d.]+)", txt).group(1))

        r2         = grab("R-squared")
        r2_between = grab(r"R-squared \(Between\)")
        r2_within  = grab(r"R-squared \(Within\)")
        r2_overall = grab(r"R-squared \(Overall\)")

        return {
            "beta": beta,
            "se":   se,
            "r2":         r2,
            "r2_between": r2_between,
            "r2_within":  r2_within,
            "r2_overall": r2_overall,
        }
    
    def fit(self, data: pd.DataFrame, formula: str):
        """
        Fit a fixed effects regression model using PanelOLS.

        Args:
            data: A subset of the full DataFrame containing required variables.
            formula: Regression formula in Patsy-style syntax.

        Returns:
            The regression summary object.
        """
        model = PanelOLS.from_formula(formula, data=data, check_rank=True)
        result = model.fit(cov_type='kernel')
        return result.summary

    def compute(self) -> dict[str, list[pd.Series]]:
        """
        Run fixed effects regressions for all combinations of independent and dependent variables,
        and store the parameter summaries for each independent variable.

        Returns:
            A dictionary where keys are independent variable names,
            and values are lists of Series with parameter info for each dependent variable.        """
        
        ALLOWED = {
            "r_vol": [
                "avg_default_verbolizer", "avg_shrink_verbolizer",
                "max_abs_default", "max_default_verbolizer", "max_shrink_verbolizer",
            ],
            "returns": [
                "max_abs_default", "max_abs_shrink",
                "min_default_verbolizer", "min_shrink_verbolizer",
            ],
            "e_returns": ["max_abs_shrink"],
            "abn_returns": [
                "max_abs_shrink", "max_default_verbolizer",
                "max_shrink_verbolizer", "min_default_verbolizer",
                "min_shrink_verbolizer",
            ],
        }

        targets = ['returns', 'e_returns', 'abn_returns', 'r_vol']
        time_frames = ['2_day', '3_day', '4_day', '5_day', '6_day', '7_day', 'full_quarter' ]

        var_names = self.df.columns 

        for x in range(1, 9):
            # Targets starts from y index
            y = 12

            per_target_params = {}
            for target in targets:  
                if target not in per_target_params:
                    per_target_params[target] = {}

                for frame in time_frames:
                    reg_name = var_names[x]
                    if reg_name not in ALLOWED[target]:
                        y += 1  
                        continue  

                    data = self.df.iloc[:, [x, 9, 10, 11, y]].dropna().copy()

                    formula = f"{var_names[y]} ~ {var_names[x]} + eps_surprise + f_size + EntityEffects + TimeEffects"
                    try:
                        result_summary = self.fit(data, formula)
                        param_info = self.extract_params(result_summary, self.var_names[x])
                        per_target_params[target][var_names[y]] = param_info
                        
                    except Exception as e:
                        print(e)
                    y += 1

            self.params_dict[var_names[x]] = per_target_params

        return self.params_dict


In [66]:
class BootStraper:

    def __init__(self, df: pd.DataFrame, num_samples: int):
        self.df = df
        self.num_samples = num_samples
        self.resamples: list[pd.DataFrame] = []

    def create_resamples(self):
        
        def random_matrix(len_sample, num_samples):
            return np.random.randint(0, len_sample, size=(len_sample, num_samples)) 

        len_sample = len(df.index.levels[0])

        matrix = random_matrix(len_sample, self.num_samples)
        
        for sample in range(self.num_samples):
            col =  matrix[:, sample]

            level0_full = df.index.levels[0]
            selected_level0_values = level0_full[col]
            filtered_df = df.loc[selected_level0_values].copy()


            level0 = filtered_df.index.get_level_values(0).to_numpy()
            level1 = filtered_df.index.get_level_values(1).to_numpy()

            block_starts = np.r_[True, level0[1:] != level0[:-1]]
            block_ids = np.cumsum(block_starts) - 1 

            new_index = pd.MultiIndex.from_arrays(
                [block_ids, level1],
                names=filtered_df.index.names
            )

            filtered_df.index = new_index
            self.resamples.append(filtered_df)

    @staticmethod
    def fit_FE(df: pd.DataFrame):

        model = FEModeler(df=df, scale=False)
        return model.compute()
    
    @staticmethod
    def _deep_append(base: dict, new: dict):
        """
        Merge `new` into `base` in place.

        Rules
        -----
        • Internal nodes are plain dicts; recurse.
        • A terminal node is a dict whose *values are scalars* (not dicts).
          For terminals we build/extend **dicts of lists**, so after N
          draws every statistic key holds N values.
        """
        for key, val in new.items():

            # -------- leaf test: val is dict but none of its values is a dict
            is_leaf = isinstance(val, dict) and not any(
                isinstance(x, dict) for x in val.values()
            )

            # -------- branch doesn't exist yet
            if key not in base:
                if is_leaf:
                    # first encounter → start lists
                    base[key] = {k: [v] for k, v in val.items()}
                else:
                    base[key] = {}
                    BootStraper._deep_append(base[key], val)
                continue

            # -------- branch exists
            if is_leaf:
                # ensure existing branch is dict of lists
                for stat_k, stat_v in val.items():
                    if stat_k not in base[key]:
                        base[key][stat_k] = []
                    base[key][stat_k].append(stat_v)
            else:
                BootStraper._deep_append(base[key], val)

    @staticmethod
    def drop_nulls(d: dict):
        keys_to_del = {}

        for regressor in d:
            for target, value in d[regressor].items():
                if not d[regressor][target]:
                    if not value:                                  
                        keys_to_del.setdefault(regressor, []).append(target)

        for reg in keys_to_del:
            for target in keys_to_del[reg]:
                del d[reg][target]

        return d

    def compute_obs(self):
        params = self.fit_FE(self.df)
        return self.drop_nulls(params)
    
    def save_json(self, filename: str = "bootstrap_params.json"):
        """Write self.params to JSON in the current directory."""
        if not hasattr(self, "params"):
            raise AttributeError("Run .run() first; self.params not set.")

        path = os.path.join(os.getcwd(), filename)
        with open(path, "w") as f:
            json.dump(self.params, f,
                      indent=2, allow_nan=False)
        print(f"Saved bootstrap results to {path}")

    def load_json(self, filename: str = "bootstrap_params.json") -> dict:
        """Load params from JSON into self.params and return it."""
        path = os.path.join(os.getcwd(), filename)
        with open(path, "r") as f:
            self.params = json.load(f)
        print(f"Loaded bootstrap results from {path}")
        return self.params

    def run(self, n_jobs: int = -1) -> dict:
        """
        * Creates all resamples,
        * fits FE regressions on each (in parallel),
        * returns a nested dict where every leaf is **a list of Series**
          (one Series per bootstrap draw).
        """
        #  1. resample list
        self.create_resamples()

        #  2. parallel estimation with progress bar
        computed = Parallel(n_jobs=n_jobs)(
            delayed(self.fit_FE)(res)
            for res in tqdm(self.resamples, desc="Bootstrapping")
        )

        #  3. combine
        params: dict = {}
        for res_dict in computed:
            self._deep_append(params, res_dict)
        
        self.params = self.drop_nulls(params)
        self.save_json()

        return self.params
    
    def compute_pvalues(self,
                        right_tail_targets=("returns",
                                            "e_returns",
                                            "abn_returns"),
                        left_tail_targets=("r_vol",),
                        store_as: str = "pvals") -> dict:
        """
        Calculates studentised bootstrap p-values:

            p̂ = (1 + Σ 1{Z_b ≥ Z_obs}) / (B + 1)   for right-tail targets
            p̂ = (1 + Σ 1{Z_b ≤ Z_obs}) / (B + 1)   for left-tail targets

        Z = beta / se   (original and bootstrap).

        Returns a nested dict mirroring self.params with scalar p-values.
        """
        obs = self.compute_obs()

        if not hasattr(self, "params"):
            raise AttributeError("Run .run() first; self.params missing.")
        
        B = self.num_samples

        pvals: dict = {}

        for reg, tgt_dict in obs.items():
            for tgt, tf_dict in tgt_dict.items():
                for tf, obs_stats in tf_dict.items():
                    beta_obs = obs_stats["beta"]
                    se_obs   = obs_stats["se"]
                    z_obs    = beta_obs / se_obs  
        

                    bstats = self.params[reg][tgt][tf]
                    beta_b = np.asarray(bstats["beta"])
                    se_b   = np.asarray(bstats["se"])
                    z_boot = (beta_b - beta_obs) / se_b 

                    z_boot_raw = beta_b / se_b 
                    q_lo, q_hi = np.percentile(z_boot_raw, [2.5, 97.5])

                    if tgt in right_tail_targets:
                        pv = (1 + np.sum(z_boot >= z_obs)) / (B + 1)
                    elif tgt in left_tail_targets:
                        pv = (1 + np.sum(z_boot <= z_obs)) / (B + 1)
                    else:
                        raise ValueError(f"Target {tgt!r} not classified.")

                    pvals.setdefault(reg, {}) \
                         .setdefault(tgt, {})[tf] = {
                             "pval":  pv,
                             "ci_lo": q_lo,
                             "ci_hi": q_hi,
                         }

        setattr(self, store_as, pvals)

        return pvals
    
    @staticmethod
    def _tf_label(tf: str) -> str:
        """
        Map column names like
            "two_day_r_vol", "4_day_returns", "full_quarter_abn_r"
        to compact codes
            "2d", "4d", "fq".
        """
        stem = tf.split("_r")[0]                  # keep part before first "_r"
        if "full_quarter" in stem:
            return "fq"

        # digits case: "4_day" etc.
        m = re.match(r"(\d+)_day", stem)
        if m:
            return f"{m.group(1)}d"

        # word case: "two_day", "three_day" … → use dict
        word_num = {
            "one": 1, "two": 2, "three": 3, "four": 4,
            "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9,
        }
        m = re.match(r"([a-z]+)_day", stem)
        if m and (w := m.group(1)) in word_num:
            return f"{word_num[w]}d"

        return stem 

    def plot_dist(
        self,
        reg: str,
        tgt: str,
        tf: str,
        bins: int = 40,
        figsize=(6, 4),
        dpi: int = 150,
    ):
        """
        Histogram of raw bootstrap t-statistics with
        • Black vertical lines at saved 95 % empirical CI
        • Red dashed zero line
        • Title shows compact tf-label and stored p-value
        """
        import numpy as np, matplotlib.pyplot as plt
    
        if not hasattr(self, "params") or not hasattr(self, "pvals"):
            raise AttributeError("Run .run() and .compute_pvalues() first.")
    
        # raw t-stats to plot
        beta_b = np.asarray(self.params[reg][tgt][tf]["beta"])
        se_b   = np.asarray(self.params[reg][tgt][tf]["se"])
        z_boot = beta_b / se_b
    
        # fetch CI and p-value saved earlier
        stats  = self.pvals[reg][tgt][tf]
        ci_lo, ci_hi = stats["ci_lo"], stats["ci_hi"]
        pv          = stats["pval"]
    
        tf_label = self._tf_label(tf)
    
        fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
        ax.hist(z_boot, bins=bins, color="#1f4e99", edgecolor="none", alpha=.85)
    
        ax.axvline(0,     color="red",   lw=1.2, ls="--")
        ax.axvline(ci_lo, color="black", lw=2.5, label="95 % empirical CI")
        ax.axvline(ci_hi, color="black", lw=2.5)
    
        ax.legend(loc="upper left", frameon=False, fontsize=9)
        ax.set_title(f"{reg} → {tgt} [{tf_label}]  (p = {pv:.4f})",
                     fontsize=11, pad=10)
        ax.set_xlabel(r"$Z_b^{*}$")
        ax.set_ylabel("Frequency")
        ax.grid(alpha=.25)
        plt.tight_layout()
        return fig
    
    def plot_all_dists(self,
                       save: bool = False,
                       out_dir: str = "bootstrap_plots",
                       show: bool = True,
                       bins: int = 40):
        """
        Generate a histogram/CI/p-value plot for every
        (regressor, target, timeframe) combo in self.params.

        Parameters
        ----------
        save    : bool
            If True, save each figure as PNG under *out_dir* instead of—or
            in addition to—displaying it on screen.
        out_dir : str
            Directory where PNGs are written (created if absent).
        show    : bool
            If True, call plt.show();  if False, figures are kept in
            memory (useful when saving only).
        bins    : int
            Histogram bin count passed to plot_dist().
        """

        if not hasattr(self, "pvals"):
            raise AttributeError("Run .compute_pvalues() first.")

        if save and not os.path.isdir(out_dir):
            os.makedirs(out_dir, exist_ok=True)

        # iterate with progress bar
        combos = (
            (reg, tgt, tf)
            for reg, tgtd in self.params.items()
            for tgt, tfd  in tgtd.items()
            for tf        in tfd.keys()
        )

        for reg, tgt, tf in tqdm(list(combos), desc="Plotting distributions"):
            # draw the figure but don't immediately close it
            fig = self.plot_dist(reg, tgt, tf, bins=bins, figsize=(6, 4))

            if save:
                fname = f"{reg}__{tgt}__{tf}.png".replace("/", "_")
                fig.savefig(os.path.join(out_dir, fname), dpi=150)

            if not show:
                plt.close(fig) 

    # -----------------------------------------------------------
    def get_latex_pval_table(self) -> str:
        """
        Build a longtable where each cell shows

            p-value
            [ ci_lo , ci_hi ]

        pulled from self.pvals.

        Returns
        -------
        str   – ready-to-include LaTeX code.
        """

        # ---------- settings ----------
        REGRESSOR_ORDER = [
            "avg_default_verbolizer", "avg_shrink_verbolizer",
            "max_abs_default", "max_abs_shrink", "max_default_verbolizer",
            "max_shrink_verbolizer", "min_default_verbolizer",
            "min_shrink_verbolizer",
        ]
        TARGET_ORDER = ["returns", "e_returns", "abn_returns", "r_vol"]
        WINDOWS = [                             # (prefix, table label)
            ("two_day",    "2d"),
            ("three_day",  "3d"),
            ("four_day",   "4d"),
            ("five_day",   "5d"),
            ("six_day",    "6d"),
            ("seven_day",  "7d"),
            ("full_q",     "fq"),
        ]

        # ---------- LaTeX scaffolding ----------
        TABLE_HEADER = r"""
    \FloatBarrier
    \setcellgapes{2pt}\makegapedcells
    \begin{center}\small
    \begin{longtable}{ll*{7}{c}}
    \caption{Bootstrap \emph{p}-values with 95\% empirical CIs in brackets.}
      \label{tab:pval-longtable}\\
    \toprule
    \multicolumn{2}{l}{} & \multicolumn{7}{c}{Return window}\\
    \cmidrule(l){3-9}
    Regressor & Target & 2d & 3d & 4d & 5d & 6d & 7d & fq\\
    \midrule
    \endfirsthead

    \toprule
    \multicolumn{2}{l}{} & \multicolumn{7}{c}{Return window (continued)}\\
    \cmidrule(l){3-9}
    Regressor & Target & 2d & 3d & 4d & 5d & 6d & 7d & fq\\
    \midrule
    \endhead

    \midrule \multicolumn{9}{r}{\emph{Continued on next page}}\\
    \endfoot

    \bottomrule
    \endlastfoot
    """.lstrip(
            "\n"
        )

        TABLE_TAIL = r"""
    \end{longtable}
    \end{center}
    \FloatBarrier
    """.lstrip(
            "\n"
        )

        CONTROLS_NOTE = (
            r"\multicolumn{9}{l}{\textit{Controls:} "
            r"firm FE, time FE, firm size, EPS surprise}\\"
        )


        # ---------- helper functions ----------
        def star(p: float) -> str:
            if p < 0.001: return r"\textsuperscript{***}"
            if p < 0.01:  return r"\textsuperscript{**}"
            if p < 0.05:  return r"\textsuperscript{*}"
            return ""


        def latex_cell(p: float, lo: float, hi: float) -> str:
            r"""
            Generates a three-line \makecell:

                0.0323^{*}
                -1.27
                -0.63
            """
            return (
                r"\makecell{{\num{{{p:.4f}}}{stars}\\[-0.2ex]"
                r"\footnotesize[\,\num{{{lo:.4f}}}\\[-0.2ex]"
                r"\footnotesize\num{{{hi:.4f}}}]}}"
            ).format(p=p, stars=star(p), lo=lo, hi=hi)

        def fetch_window_pvals(tgt_dict: dict, prefix: str):
            """Return dict with p, lo, hi for first key starting with prefix."""
            for k, v in tgt_dict.items():
                if k.startswith(prefix):
                    return {
                        "p": v["pval"],
                        "lo": v["ci_lo"],
                        "hi": v["ci_hi"],
                    }
            return None

        def latex_escape(s: str) -> str:
            return s.replace("_", r"\_")

        # ---------- build table body ----------
        pvals = self.pvals  # assumes compute_pvalues() has run
        lines = []

        for reg in REGRESSOR_ORDER:
            if reg not in pvals:
                continue
            reg_dict = pvals[reg]
            n_targets = sum(1 for t in TARGET_ORDER if t in reg_dict)
            first_row = True

            for tgt in TARGET_ORDER:
                if tgt not in reg_dict:
                    continue
                tgt_dict = reg_dict[tgt]
                cells = []
                for prefix, _lbl in WINDOWS:
                    entry = fetch_window_pvals(tgt_dict, prefix)
                    if entry is not None:
                        cells.append(latex_cell(entry["p"], entry["lo"], entry["hi"]))
                    else:
                        cells.append(r"--")  # placeholder

                if first_row:
                    label = rf"\multirow[t]{{{n_targets}}}{{*}}{{{latex_escape(reg)}}}"
                    first_row = False
                else:
                    label = " " * 12

                lines += [
                    rf"{label} & {latex_escape(tgt)} & " + " & ".join(cells) + r" \\",
                    r"\cline{2-9}",
                ]

            lines[-1] = r"\midrule"  # change last cline → midrule

        lines.pop()                 # drop final midrule
        lines.append(CONTROLS_NOTE)

        body = "\n".join(lines)
        return TABLE_HEADER + body + TABLE_TAIL

In [None]:
bootsrtap = BootStraper(df, 3000)
bootsrtap.run()

Bootstrapping:   0%|          | 0/3000 [00:00<?, ?it/s]

In [None]:
bootsrtap.compute_pvalues()

{'avg_default_verbolizer': {'r_vol': {'two_day_r_vol': {'pval': 0.03225806451612903,
    'ci_lo': -2.7033402980034613,
    'ci_hi': -0.34863466751764727},
   'three_day_r_vol': {'pval': 0.03225806451612903,
    'ci_lo': -2.7965891414521553,
    'ci_hi': -0.8912327912579602},
   'four_day_r_vol': {'pval': 0.03225806451612903,
    'ci_lo': -2.71577510082047,
    'ci_hi': -1.0738293411316169},
   'five_day_r_vol': {'pval': 0.03225806451612903,
    'ci_lo': -3.2025576877092297,
    'ci_hi': -1.390611258553848},
   'six_day_r_vol': {'pval': 0.03225806451612903,
    'ci_lo': -3.510302019205163,
    'ci_hi': -1.4817846474019176},
   'seven_day_r_vol': {'pval': 0.03225806451612903,
    'ci_lo': -3.2490490050633727,
    'ci_hi': -1.4247958257044386},
   'full_q_r_vol': {'pval': 0.03225806451612903,
    'ci_lo': -4.242229166666667,
    'ci_hi': -1.0561904040743992}}},
 'avg_shrink_verbolizer': {'r_vol': {'two_day_r_vol': {'pval': 0.03225806451612903,
    'ci_lo': -2.0966248892846373,
    'ci_hi'

In [None]:
print(bootsrtap.get_latex_pval_table()) 

    \FloatBarrier
    \setcellgapes{2pt}\makegapedcells
    \begin{center}\small
    \begin{longtable}{ll*{7}{c}}
    \caption{Bootstrap \emph{p}-values with 95\% empirical CIs in brackets.}
      \label{tab:pval-longtable}\\
    \toprule
    \multicolumn{2}{l}{} & \multicolumn{7}{c}{Return window}\\
    \cmidrule(l){3-9}
    Regressor & Target & 2d & 3d & 4d & 5d & 6d & 7d & fq\\
    \midrule
    \endfirsthead

    \toprule
    \multicolumn{2}{l}{} & \multicolumn{7}{c}{Return window (continued)}\\
    \cmidrule(l){3-9}
    Regressor & Target & 2d & 3d & 4d & 5d & 6d & 7d & fq\\
    \midrule
    \endhead

    \midrule \multicolumn{9}{r}{\emph{Continued on next page}}\\
    \endfoot

    \bottomrule
    \endlastfoot
    \multirow[t]{1}{*}{avg\_default\_verbolizer} & r\_vol & \makecell{\num{0.0323}\textsuperscript{*}\\[-0.2ex]\footnotesize[\,\num{-2.7033}\\[-0.2ex]\footnotesize\num{-0.3486}]} & \makecell{\num{0.0323}\textsuperscript{*}\\[-0.2ex]\footnotesize[\,\num{-2.7966}\\[-0.2ex]\foo

In [None]:
bootsrtap.compute_pvalues()
bootsrtap.plot_all_dists(show=False)

Plotting distributions:   0%|          | 0/105 [00:00<?, ?it/s]