# Preamble

In [1]:
%matplotlib notebook

## Notebook parameters

In [2]:
NAME = 'c_3_generate_narrative_statistics'
PROJECT = 'covid-empirical'
PYTHON_VERSION = '3.9'
USER = 'Ties'
CONDA_ENVIRONMENT = 'covid-empirical'
USE_EXTERNAL_PIPELINE = True

## Run preamble script

In [3]:
%run -i preamble.py 

----------------------------------------------------------------------------------
The following utility functions are loaded and available through `functions.<..>`:
----------------------------------------------------------------------------------

extract_data_edgar_link, fast_load_json, fast_store_json, flatten_multiindex_column, inDB, recreate_edgar_link

----------------------------------------------------------------
The following modules and functions are imported by preamble.py:
----------------------------------------------------------------

copy, delayed, importlib, json, math, np, orjson, os, pd, plt, pqdm_p, pqdm_t, random, re, requests, sys, time, yaml


## Notebook specific imports

In [4]:
from bs4 import BeautifulSoup
import html
import unidecode

### Scipy

In [10]:
import scipy.stats

import inspect

def props(obj):
    pr = {}
    for name in dir(obj):
        value = getattr(obj, name)
        if not name.startswith('_') and not inspect.ismethod(value):
            if isinstance(value, float):
                pr[name] = value
    return pr

--------------------
# Load Data
---------------------

## Filing Meta data

In [11]:
filing_df = pd.read_hdf(Path.cwd() / '2_pipeline' / 'a_6a_download_raw_filings' / 'out' / 'filing_df.h5')

In [12]:
filing_df['Cal_year'] = filing_df['reportDate'].dt.year

In [13]:
filing_df['Cal_quarter'] = filing_df['reportDate'].dt.quarter

## Load regression data

## Load text data

In [14]:
full_metric_df = pd.read_stata(Path.cwd() / '2_pipeline' / 'a_6c_calculate_text_metrics' / 'out'/ 'text_statistics.dta')
full_metric_df.reset_index(drop = True, inplace=True)

In [15]:
full_metric_df = pd.merge(full_metric_df, filing_df[['uniqueID', 'Cal_year', 'Cal_quarter']], on = 'uniqueID', how='left')

### Deal with missing values

In [17]:
cols_to_zero = ['number_of_sentences', 'number_of_tokens', 'number_of_words', 'number_of_covid_words', 'number_of_risk_factors', 'number_of_numbers']
for i, row in tqdm(full_metric_df.iterrows(), total = len(full_metric_df)):
    if row['sections_extracted'] == 1:
        if not pd.isna(row['mda_number_of_sentences']):
            for prefix in ['rf_', 'mda_']:
                for col in cols_to_zero:
                    try:
                        if pd.isna(row[prefix + col]):
                            full_metric_df.at[i,prefix+col] = 0
                    except KeyError:
                        pass

  0%|          | 0/81109 [00:00<?, ?it/s]

## Store

In [18]:
full_metric_df.to_stata(pipeline / 'out' / 'text_analytics.dta', write_index=False)

In [19]:
reg_df = pd.read_stata(Path.cwd() / '2_pipeline'/ 'c_1_generate_variables' / 'out' / 'regression_data.dta')

### Cik lookup

In [20]:
cik_sample_lookup = {}
for name, df in reg_df[reg_df.Cal_year.isin([2019, 2020, 2021])].groupby('closing_qtr'):
    cik_sample_lookup[int(name)] = list(df['cik'].unique())

In [21]:
pd.isna(reg_df.full_number_of_sentences).value_counts()

False    31142
True       548
Name: full_number_of_sentences, dtype: int64

# Analytics

## Prep

### Create indicator

In [23]:
for col in ['full_number_of_covid_words', 'rf_number_of_risk_factors', 'rf_number_of_covid_words']:
    full_metric_df['d_'+ col] = full_metric_df[col] > 0 & pd.notna(full_metric_df[col])
    full_metric_df['d_'+ col] = full_metric_df['d_'+ col] ** 1

In [24]:
for col in ['full_number_of_covid_words', 'rf_number_of_risk_factors', 'rf_number_of_covid_words']:
    reg_df['d_'+ col] = reg_df[col] > 0 & pd.notna(reg_df[col])
    reg_df['d_'+ col] = reg_df['d_'+ col] ** 1

### Labeling

In [25]:
text_cols = ["full_number_of_sentences", 'full_number_of_covid_words', 'mda_number_of_sentences', 'rf_number_of_risk_factors', 'rf_number_of_covid_words',
             "full_fog_index", 'd_full_number_of_covid_words', 'd_rf_number_of_covid_words', 'mda_number_of_covid_words']

In [26]:
id_cols =  ['form']

## Coverage statistics

In [27]:
text_fname = full_metric_df.fname.unique()
sample_fname = reg_df.fname.unique()

In [28]:
print(f'Coverage: {len(set(sample_fname).intersection(set(text_fname))) / len(set(sample_fname)) * 100:.2f}%')

Coverage: 99.58%


## Merge two datasets

In [29]:
to_keep = ['fname', 'cik', 'Cal_year']
for col in reg_df:
    if col in full_metric_df.columns:
        if col not in to_keep:
            reg_df = reg_df.drop(col, axis=1)
        print(f'''Dropped {col} from `reg_df`''')

Dropped cik from `reg_df`
Dropped Cal_year from `reg_df`
Dropped fname from `reg_df`
Dropped form from `reg_df`
Dropped full_number_of_sentences from `reg_df`
Dropped mda_number_of_sentences from `reg_df`
Dropped rf_number_of_risk_factors from `reg_df`
Dropped full_fog_index from `reg_df`
Dropped full_number_of_covid_words from `reg_df`
Dropped rf_number_of_covid_words from `reg_df`
Dropped d_full_number_of_covid_words from `reg_df`
Dropped d_rf_number_of_risk_factors from `reg_df`
Dropped d_rf_number_of_covid_words from `reg_df`


In [30]:
reg_df = pd.merge(reg_df.drop('form', axis = 1, errors='ignore'), full_metric_df.drop(['cik', 'Cal_year'], axis = 1), how='left', on = 'fname')

In [31]:
period_label_dict = {
    2019 : 'Avg. 2019',
    20191 : 'Q1 2019',
    20192 : 'Q2 2019',
    20193 : 'Q3 2019',
    20194 : 'Q4 2019',
    20201 : 'Q1 2020',
    20202 : 'Q2 2020',
    20203 : 'Q3 2020',
    20204 : 'Q4 2020',
    20211 : 'Q1 2021',
    20212 : 'Q2 2021'
}

### Add indicators

In [32]:
for year_1, year_2 in [(2019, 2020), (2019, 2021), (2020, 2021)]:
    for quarter in [1, 2, 3, 4]:
        if year_2 == 2021 and quarter > 2:
            pass
        else:     
            comparison_group = f'group_{year_1}_vs_{year_2}_q{quarter}'
            reg_df[comparison_group] = reg_df.apply(lambda x: x['Cal_year'] in [year_1, year_2] and x['Cal_quarter'] == quarter,axis=1)
            reg_df[comparison_group] = reg_df[comparison_group]**1
            print(comparison_group)

group_2019_vs_2020_q1
group_2019_vs_2020_q2
group_2019_vs_2020_q3
group_2019_vs_2020_q4
group_2019_vs_2021_q1
group_2019_vs_2021_q2
group_2020_vs_2021_q1
group_2020_vs_2021_q2


## Generate sub tables

In [34]:
combo_list = [
    {'l' : 2019, 'r' : 2020, 'e' : 2021, 'q' : 1, 'form' : '10-Q'},
    {'l' : 2019, 'r' : 2020, 'e' : 2021, 'q' : 2, 'form' : '10-Q'},
    {'l' : 2019, 'r' : 2020, 'e' : None, 'q' : 3, 'form' : '10-Q'},
    {'l' : 2019, 'r' : 2020, 'e' : None, 'q' : 4, 'form' : '10-K'},
    {'l' : 2019, 'r' : 2021, 'e' : 2020, 'q' : 1, 'form' : '10-Q'},
    {'l' : 2019, 'r' : 2021, 'e' : 2020, 'q' : 2, 'form' : '10-Q'},
]

In [35]:
df_to_use = reg_df.copy()
df_to_use.dropna(subset = text_cols, how = 'any')
form = 'all'
collection_df = pd.DataFrame()
sig_df = pd.DataFrame()
for combo_dict in combo_list:
    year_1, year_2, quarter, form  = combo_dict['l'], combo_dict['r'], combo_dict['q'], combo_dict['form']
    
    comparison_group = f'group_{year_1}_vs_{year_2}_q{quarter}'
    
    if combo_dict['e']:
        c_group_2 = f'group_{year_1}_vs_{combo_dict["e"]}_q{quarter}'
        focus_df = df_to_use[(df_to_use[comparison_group] == 1) | (df_to_use[c_group_2] == 1)]
        tmp = focus_df.groupby('cik').size()
        tmp = tmp[tmp == 3]
    else:
        focus_df = df_to_use[(df_to_use[comparison_group] == 1)]
        tmp = focus_df.groupby('cik').size()
        tmp = tmp[tmp == 2]

    ## Calculate pre-post
    focus_df['Post'] = focus_df['Cal_year'].astype(int) == year_2 #sec_date_year
    focus_df['Post'] = focus_df['Post']**1

    focus_df = focus_df[focus_df.cik.isin(tmp.index.to_list())]
    focus_df = focus_df[(focus_df[comparison_group] == 1)]

    ## Only keep CIK that exists in our  regression sample
    focus_df = focus_df[focus_df.apply(lambda x: x['cik_padded'] in cik_sample_lookup[x['Cal_quarter']], axis=1)]

    stat_list = []
    for name, df in focus_df.groupby('Post'):
        perc_10k = df.form.value_counts(normalize = True).loc['10-K'] * 100
        orig_size = len(df.index)
        df = df[id_cols + text_cols]
        df = df.dropna(subset = text_cols, how = 'any')

        print(f'{name: < 8} - Coverage: {len(df) / orig_size * 100:.2f}% - Percentage 10-K {perc_10k:.2f}%')

        for tform in ['10-K', '10-Q', 'all']:
            if tform != 'all':
                desc_df = df[df.form == tform].describe()
            else:
                desc_df = df.describe()

            for metric in ['count', 'mean', '50%']:
                metric_dict = desc_df.loc[metric].to_dict()
                metric_dict['group'] = name
                metric_dict['metric'] = metric
                metric_dict['form'] = tform
                stat_list.append(metric_dict)
                
    ## Significance test
    pre_df,post_df = focus_df[focus_df['Post'] == 0], focus_df[focus_df['Post'] == 1]

    pre_df = pre_df[id_cols + text_cols]
    pre_df = pre_df.dropna(subset = text_cols, how = 'any')

    post_df = post_df[id_cols + text_cols]
    post_df = post_df.dropna(subset = text_cols, how = 'any')

    pre_df = pre_df[pre_df.form == form]
    post_df = post_df[post_df.form == form]
    
    sig_list = []
    for text_col in text_cols:
        sig_dict = {
            'comparison' : comparison_group,
            'column' : text_col,
            'form' : form
        }
        sig_dict['pre_mean'], sig_dict['post_mean'] = pre_df[text_col].mean(), post_df[text_col].mean()
        sig_dict['dif'] = sig_dict['post_mean'] - sig_dict['pre_mean']
        sig_res = props(scipy.stats.ttest_ind(pre_df[text_col], post_df[text_col]))
        for k,v in sig_res.items():
            sig_dict[k] = v

        sig_list.append(sig_dict)
        
    sig_df = sig_df.append(pd.DataFrame(sig_list))
    
    ## Wrap up stats table
        
    stat_df = pd.DataFrame(stat_list)
    first_col = ['group', 'metric', 'form']
    stat_df = stat_df[first_col + [x for x in stat_df.columns if x not in first_col]]

    print()
    print(comparison_group, form)

    mean_stats_df = stat_df[(stat_df.metric == 'mean') & (stat_df.form == form)].reset_index(drop=True).round(2).drop(['metric', 'form', 'group'],axis=1)

    num_obs = int(stat_df[(stat_df.metric == 'count') & (stat_df.form == form)].reset_index(drop=True)['full_number_of_sentences'].iloc[0])
    mean_stats_df['N']  = num_obs

    for col in ['full_number_of_sentences', 'mda_number_of_sentences']:
        mean_stats_df[col] = mean_stats_df[col].astype(int)

    mean_stats_df = mean_stats_df.T

    mean_stats_df['change'] = mean_stats_df[1] - mean_stats_df[0]
    mean_stats_df = mean_stats_df.rename(columns = {0 : f'Q{quarter} - {year_1}', 1:f'Q{quarter} - {year_2}'})

    display(mean_stats_df)

    collection_df  = collection_df.append(mean_stats_df.T) 
    
    ## Add empty row
    collection_df = collection_df.append(pd.Series(name=''), ignore_index=False)

    display(stat_df[(stat_df.metric == 'count') & (stat_df.form == form)].reset_index(drop=True))
    
sig_df = sig_df.round(3)

 0       - Coverage: 87.94% - Percentage 10-K 7.14%
 1       - Coverage: 89.02% - Percentage 10-K 8.68%

group_2019_vs_2020_q1 10-Q


Unnamed: 0,Q1 - 2019,Q1 - 2020,change
full_number_of_sentences,599.0,646.0,47.0
full_number_of_covid_words,0.04,36.28,36.24
mda_number_of_sentences,188.0,210.0,22.0
rf_number_of_risk_factors,7.49,8.88,1.39
rf_number_of_covid_words,0.0,11.15,11.15
full_fog_index,19.64,19.87,0.23
d_full_number_of_covid_words,0.01,0.99,0.98
d_rf_number_of_covid_words,0.0,0.83,0.83
mda_number_of_covid_words,0.02,16.74,16.72
N,2391.0,2391.0,0.0


Unnamed: 0,group,metric,form,full_number_of_sentences,full_number_of_covid_words,mda_number_of_sentences,rf_number_of_risk_factors,rf_number_of_covid_words,full_fog_index,d_full_number_of_covid_words,d_rf_number_of_covid_words,mda_number_of_covid_words
0,0,count,10-Q,2391.0,2391.0,2391.0,2391.0,2391.0,2391.0,2391.0,2391.0,2391.0
1,1,count,10-Q,2438.0,2438.0,2438.0,2438.0,2438.0,2438.0,2438.0,2438.0,2438.0


 0       - Coverage: 87.09% - Percentage 10-K 6.09%
 1       - Coverage: 87.95% - Percentage 10-K 6.02%

group_2019_vs_2020_q2 10-Q


Unnamed: 0,Q2 - 2019,Q2 - 2020,change
full_number_of_sentences,647.0,702.0,55.0
full_number_of_covid_words,0.05,42.81,42.76
mda_number_of_sentences,211.0,239.0,28.0
rf_number_of_risk_factors,8.88,9.92,1.04
rf_number_of_covid_words,0.01,11.18,11.17
full_fog_index,19.56,19.8,0.24
d_full_number_of_covid_words,0.01,1.0,0.99
d_rf_number_of_covid_words,0.0,0.74,0.74
mda_number_of_covid_words,0.02,22.39,22.37
N,2455.0,2455.0,0.0


Unnamed: 0,group,metric,form,full_number_of_sentences,full_number_of_covid_words,mda_number_of_sentences,rf_number_of_risk_factors,rf_number_of_covid_words,full_fog_index,d_full_number_of_covid_words,d_rf_number_of_covid_words,mda_number_of_covid_words
0,0,count,10-Q,2455.0,2455.0,2455.0,2455.0,2455.0,2455.0,2455.0,2455.0,2455.0
1,1,count,10-Q,2487.0,2487.0,2487.0,2487.0,2487.0,2487.0,2487.0,2487.0,2487.0


 0       - Coverage: 89.32% - Percentage 10-K 6.17%
 1       - Coverage: 88.40% - Percentage 10-K 6.03%

group_2019_vs_2020_q3 10-Q


Unnamed: 0,Q3 - 2019,Q3 - 2020,change
full_number_of_sentences,663.0,720.0,57.0
full_number_of_covid_words,0.07,41.09,41.02
mda_number_of_sentences,213.0,241.0,28.0
rf_number_of_risk_factors,9.44,10.41,0.97
rf_number_of_covid_words,0.01,10.55,10.54
full_fog_index,19.83,20.05,0.22
d_full_number_of_covid_words,0.02,1.0,0.98
d_rf_number_of_covid_words,0.0,0.66,0.66
mda_number_of_covid_words,0.03,21.4,21.37
N,2416.0,2416.0,0.0


Unnamed: 0,group,metric,form,full_number_of_sentences,full_number_of_covid_words,mda_number_of_sentences,rf_number_of_risk_factors,rf_number_of_covid_words,full_fog_index,d_full_number_of_covid_words,d_rf_number_of_covid_words,mda_number_of_covid_words
0,0,count,10-Q,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0
1,1,count,10-Q,2395.0,2395.0,2395.0,2395.0,2395.0,2395.0,2395.0,2395.0,2395.0


 0       - Coverage: 90.46% - Percentage 10-K 81.09%
 1       - Coverage: 90.00% - Percentage 10-K 81.25%

group_2019_vs_2020_q4 10-K


Unnamed: 0,Q4 - 2019,Q4 - 2020,change
full_number_of_sentences,1777.0,1847.0,70.0
full_number_of_covid_words,5.09,51.01,45.92
mda_number_of_sentences,306.0,318.0,12.0
rf_number_of_risk_factors,37.98,38.73,0.75
rf_number_of_covid_words,2.98,16.7,13.72
full_fog_index,20.07,20.2,0.13
d_full_number_of_covid_words,0.59,1.0,0.41
d_rf_number_of_covid_words,0.54,0.97,0.43
mda_number_of_covid_words,0.76,17.32,16.56
N,2203.0,2203.0,0.0


Unnamed: 0,group,metric,form,full_number_of_sentences,full_number_of_covid_words,mda_number_of_sentences,rf_number_of_risk_factors,rf_number_of_covid_words,full_fog_index,d_full_number_of_covid_words,d_rf_number_of_covid_words,mda_number_of_covid_words
0,0,count,10-K,2203.0,2203.0,2203.0,2203.0,2203.0,2203.0,2203.0,2203.0,2203.0
1,1,count,10-K,2179.0,2179.0,2179.0,2179.0,2179.0,2179.0,2179.0,2179.0,2179.0


 0       - Coverage: 87.94% - Percentage 10-K 7.14%
 1       - Coverage: 87.89% - Percentage 10-K 7.01%

group_2019_vs_2021_q1 10-Q


Unnamed: 0,Q1 - 2019,Q1 - 2021,change
full_number_of_sentences,599.0,604.0,5.0
full_number_of_covid_words,0.04,25.38,25.34
mda_number_of_sentences,188.0,204.0,16.0
rf_number_of_risk_factors,7.49,7.23,-0.26
rf_number_of_covid_words,0.0,4.09,4.09
full_fog_index,19.64,19.85,0.21
d_full_number_of_covid_words,0.01,0.99,0.98
d_rf_number_of_covid_words,0.0,0.2,0.2
mda_number_of_covid_words,0.02,14.69,14.67
N,2391.0,2391.0,0.0


Unnamed: 0,group,metric,form,full_number_of_sentences,full_number_of_covid_words,mda_number_of_sentences,rf_number_of_risk_factors,rf_number_of_covid_words,full_fog_index,d_full_number_of_covid_words,d_rf_number_of_covid_words,mda_number_of_covid_words
0,0,count,10-Q,2391.0,2391.0,2391.0,2391.0,2391.0,2391.0,2391.0,2391.0,2391.0
1,1,count,10-Q,2309.0,2309.0,2309.0,2309.0,2309.0,2309.0,2309.0,2309.0,2309.0


 0       - Coverage: 87.09% - Percentage 10-K 6.09%
 1       - Coverage: 87.09% - Percentage 10-K 6.19%

group_2019_vs_2021_q2 10-Q


Unnamed: 0,Q2 - 2019,Q2 - 2021,change
full_number_of_sentences,647.0,643.0,-4.0
full_number_of_covid_words,0.05,26.74,26.69
mda_number_of_sentences,211.0,224.0,13.0
rf_number_of_risk_factors,8.88,8.26,-0.62
rf_number_of_covid_words,0.01,4.64,4.63
full_fog_index,19.56,19.78,0.22
d_full_number_of_covid_words,0.01,0.99,0.98
d_rf_number_of_covid_words,0.0,0.24,0.24
mda_number_of_covid_words,0.02,15.54,15.52
N,2455.0,2455.0,0.0


Unnamed: 0,group,metric,form,full_number_of_sentences,full_number_of_covid_words,mda_number_of_sentences,rf_number_of_risk_factors,rf_number_of_covid_words,full_fog_index,d_full_number_of_covid_words,d_rf_number_of_covid_words,mda_number_of_covid_words
0,0,count,10-Q,2455.0,2455.0,2455.0,2455.0,2455.0,2455.0,2455.0,2455.0,2455.0
1,1,count,10-Q,2455.0,2455.0,2455.0,2455.0,2455.0,2455.0,2455.0,2455.0,2455.0


## Combined table

In [37]:
rename_dict = {
    'full_number_of_sentences' : '# sentences in filing',
    'full_number_of_covid_words' : '# COVID mentions in filing',
    'rf_number_of_covid_words':'# COVID mentions in Risk Factor',
    'mda_number_of_sentences' : '# sentences in MD&A',
    'rf_number_of_risk_factors' : '# risk factors',
    'full_fog_index' : 'FOG readability score',
    'd_full_number_of_covid_words' : '% filings that mention COVID',
    'd_rf_number_of_covid_words' : '% filings that mention COVID in Risk Factor',
    'mda_number_of_covid_words' : '# COVID mentions in MD&A'
}
final_table = collection_df.fillna('').rename(columns=rename_dict)

In [38]:
final_table

Unnamed: 0,# sentences in filing,# COVID mentions in filing,# sentences in MD&A,# risk factors,# COVID mentions in Risk Factor,FOG readability score,% filings that mention COVID,% filings that mention COVID in Risk Factor,# COVID mentions in MD&A,N
Q1 - 2019,599.0,0.04,188.0,7.49,0.0,19.64,0.01,0.0,0.02,2391.0
Q1 - 2020,646.0,36.28,210.0,8.88,11.15,19.87,0.99,0.83,16.74,2391.0
change,47.0,36.24,22.0,1.39,11.15,0.23,0.98,0.83,16.72,0.0
,,,,,,,,,,
Q2 - 2019,647.0,0.05,211.0,8.88,0.01,19.56,0.01,0.0,0.02,2455.0
Q2 - 2020,702.0,42.81,239.0,9.92,11.18,19.8,1.0,0.74,22.39,2455.0
change,55.0,42.76,28.0,1.04,11.17,0.24,0.99,0.74,22.37,0.0
,,,,,,,,,,
Q3 - 2019,663.0,0.07,213.0,9.44,0.01,19.83,0.02,0.0,0.03,2416.0
Q3 - 2020,720.0,41.09,241.0,10.41,10.55,20.05,1.0,0.66,21.4,2416.0


In [39]:
final_table.to_excel(pipeline / 'out' / 'statistics.xslx')

## Show significance

In [40]:
for name, df in sig_df.groupby('comparison'):
    print(name)
    t_df = df.T
    t_df = t_df.rename(columns=t_df.loc['column'])
    t_df = t_df.drop(t_df.index[1])
    t_df = t_df.rename(columns = rename_dict)
    display(t_df)

group_2019_vs_2020_q1


Unnamed: 0,# sentences in filing,# COVID mentions in filing,# sentences in MD&A,# risk factors,# COVID mentions in Risk Factor,FOG readability score,% filings that mention COVID,% filings that mention COVID in Risk Factor,# COVID mentions in MD&A
comparison,group_2019_vs_2020_q1,group_2019_vs_2020_q1,group_2019_vs_2020_q1,group_2019_vs_2020_q1,group_2019_vs_2020_q1,group_2019_vs_2020_q1,group_2019_vs_2020_q1,group_2019_vs_2020_q1,group_2019_vs_2020_q1
form,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q
pre_mean,599.66,0.04,188.50,7.49,0.01,19.64,0.01,0.00,0.03
post_mean,646.87,36.28,210.97,8.88,11.15,19.87,0.99,0.83,16.74
dif,47.20,36.24,22.48,1.40,11.15,0.23,0.98,0.83,16.72
pvalue,0.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,0.00
statistic,-5.25,-82.49,-6.25,-2.60,-54.89,-7.65,-322.97,-108.04,-64.51


group_2019_vs_2020_q2


Unnamed: 0,# sentences in filing,# COVID mentions in filing,# sentences in MD&A,# risk factors,# COVID mentions in Risk Factor,FOG readability score,% filings that mention COVID,% filings that mention COVID in Risk Factor,# COVID mentions in MD&A
comparison,group_2019_vs_2020_q2,group_2019_vs_2020_q2,group_2019_vs_2020_q2,group_2019_vs_2020_q2,group_2019_vs_2020_q2,group_2019_vs_2020_q2,group_2019_vs_2020_q2,group_2019_vs_2020_q2,group_2019_vs_2020_q2
form,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q
pre_mean,647.70,0.05,211.02,8.88,0.01,19.56,0.01,0.00,0.02
post_mean,702.98,42.81,239.52,9.92,11.18,19.80,1.00,0.74,22.39
dif,55.28,42.76,28.51,1.04,11.17,0.25,0.99,0.74,22.36
pvalue,0.00,0.00,0.00,0.07,0.00,0.00,0.00,0.00,0.00
statistic,-5.82,-83.33,-7.28,-1.83,-47.65,-7.95,-420.57,-82.45,-66.80


group_2019_vs_2020_q3


Unnamed: 0,# sentences in filing,# COVID mentions in filing,# sentences in MD&A,# risk factors,# COVID mentions in Risk Factor,FOG readability score,% filings that mention COVID,% filings that mention COVID in Risk Factor,# COVID mentions in MD&A
comparison,group_2019_vs_2020_q3,group_2019_vs_2020_q3,group_2019_vs_2020_q3,group_2019_vs_2020_q3,group_2019_vs_2020_q3,group_2019_vs_2020_q3,group_2019_vs_2020_q3,group_2019_vs_2020_q3,group_2019_vs_2020_q3
form,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q
pre_mean,663.50,0.07,213.25,9.44,0.01,19.83,0.02,0.00,0.03
post_mean,720.67,41.09,241.53,10.41,10.55,20.05,1.00,0.66,21.40
dif,57.16,41.02,28.28,0.96,10.53,0.22,0.98,0.66,21.36
pvalue,0.00,0.00,0.00,0.11,0.00,0.00,0.00,0.00,0.00
statistic,-5.67,-77.68,-6.96,-1.61,-42.42,-7.05,-367.56,-68.05,-63.62


group_2019_vs_2020_q4


Unnamed: 0,# sentences in filing,# COVID mentions in filing,# sentences in MD&A,# risk factors,# COVID mentions in Risk Factor,FOG readability score,% filings that mention COVID,% filings that mention COVID in Risk Factor,# COVID mentions in MD&A
comparison,group_2019_vs_2020_q4,group_2019_vs_2020_q4,group_2019_vs_2020_q4,group_2019_vs_2020_q4,group_2019_vs_2020_q4,group_2019_vs_2020_q4,group_2019_vs_2020_q4,group_2019_vs_2020_q4,group_2019_vs_2020_q4
form,10-K,10-K,10-K,10-K,10-K,10-K,10-K,10-K,10-K
pre_mean,1777.56,5.09,306.12,37.98,2.98,20.07,0.59,0.55,0.76
post_mean,1847.50,51.01,318.05,38.73,16.70,20.20,1.00,0.97,17.32
dif,69.94,45.92,11.94,0.75,13.73,0.13,0.41,0.43,16.57
pvalue,0.00,0.00,0.11,0.23,0.00,0.00,0.00,0.00,0.00
statistic,-3.83,-65.03,-1.61,-1.19,-48.56,-5.32,-39.19,-38.32,-52.55


group_2019_vs_2021_q1


Unnamed: 0,# sentences in filing,# COVID mentions in filing,# sentences in MD&A,# risk factors,# COVID mentions in Risk Factor,FOG readability score,% filings that mention COVID,% filings that mention COVID in Risk Factor,# COVID mentions in MD&A
comparison,group_2019_vs_2021_q1,group_2019_vs_2021_q1,group_2019_vs_2021_q1,group_2019_vs_2021_q1,group_2019_vs_2021_q1,group_2019_vs_2021_q1,group_2019_vs_2021_q1,group_2019_vs_2021_q1,group_2019_vs_2021_q1
form,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q
pre_mean,599.66,0.04,188.50,7.49,0.01,19.64,0.01,0.00,0.03
post_mean,604.31,25.38,204.44,7.23,4.09,19.85,0.99,0.20,14.69
dif,4.64,25.34,15.95,-0.26,4.09,0.21,0.98,0.20,14.67
pvalue,0.61,0.00,0.00,0.64,0.00,0.00,0.00,0.00,0.00
statistic,-0.51,-63.21,-4.53,0.47,-18.19,-6.83,-345.34,-23.70,-60.38


group_2019_vs_2021_q2


Unnamed: 0,# sentences in filing,# COVID mentions in filing,# sentences in MD&A,# risk factors,# COVID mentions in Risk Factor,FOG readability score,% filings that mention COVID,% filings that mention COVID in Risk Factor,# COVID mentions in MD&A
comparison,group_2019_vs_2021_q2,group_2019_vs_2021_q2,group_2019_vs_2021_q2,group_2019_vs_2021_q2,group_2019_vs_2021_q2,group_2019_vs_2021_q2,group_2019_vs_2021_q2,group_2019_vs_2021_q2,group_2019_vs_2021_q2
form,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q,10-Q
pre_mean,647.70,0.05,211.02,8.88,0.01,19.56,0.01,0.00,0.02
post_mean,643.51,26.74,224.93,8.26,4.64,19.78,0.99,0.24,15.54
dif,-4.19,26.69,13.91,-0.62,4.63,0.22,0.98,0.23,15.52
pvalue,0.66,0.00,0.00,0.28,0.00,0.00,0.00,0.00,0.00
statistic,0.45,-64.58,-3.75,1.09,-20.18,-7.00,-338.47,-27.13,-60.79
