# Frame Consolidation Analysis
This notebook inve3stigates whether legislators within parties have become more consistent in their language use over the past 34 yeras.

In [1]:
%run Frame_analysis_functions.py
import joblib
import seaborn as sns
from scipy.spatial.distance import cosine
from plotnine import ggplot, aes, geoms

In [20]:
# Read in data
all_df = pd.read_csv('Results/All_speeches_labelled.csv')
all_df = all_df.loc[all_df.party_y != 'I']

term_df = pd.read_csv('Results/term_correlations_frames.csv')

combinations = []
for year in range(1983,2017):
    for topic in all_df.dynamic_label.unique():
        combinations.append((year,topic))

In [170]:
def run_pairwise_cos(mat):
    """
    Performs comparison of term distributions for every legislator
    with every other legislator in party, then calculates M,SD
    
    args:
        - a matrix of term distributions for every legislator
    output:
        - mean and sd of all pairwise comparisons
    
    """
    
    cos_mat = np.zeros([mat.shape[0],mat.shape[0]])
    for i in range(mat.shape[0]):
        for j in range(mat.shape[0]):
            if i != j:
                cos_mat[i,j] = 1 - cosine(mat[i,:],mat[j,:])

    upper_tri = cos_mat[np.triu_indices_from(cos_mat,1)]
    mean = np.mean(upper_tri)
    std = np.std(upper_tri)
    return mean,std

In [171]:
def run_conslidation(year,topic):
    
    sub_df = all_df.loc[(all_df.year_y == year) & (all_df.dynamic_label == topic)]
    if len(sub_df) > 0:
        
        # make DTM
        DTM = make_DTM(sub_df,binary=False,remove_speaker=False) # Make DTM
        
        # get polarizating terms from frame polarization nalysis
        terms = term_df.loc[(term_df.topic == topic) & (term_df.year == year)]
    
        # subset party legislators and terms with Dem/Rep Partisanship score (e.g Party Messages)
        DTM_D = DTM.loc[DTM.party_y == 'D']
        D_drop = [col for col in DTM_D.columns if col not in terms.loc[terms.correlation < 0,'term'].values]
        np_d = DTM_D.drop(D_drop,1).to_numpy()
        np_d = np_d[[True if i > 0 else False for i in np_d.sum(1)],:]

        DTM_R = DTM.loc[DTM.party_y == 'R']
        R_drop = [col for col in DTM_R.columns if col not in terms.loc[terms.correlation > 0,'term'].values]
        np_r = DTM_R.drop(R_drop,1).to_numpy()
        np_r = np_r[[True if i > 0 else False for i in np_r.sum(1)],:]
        
        # run pairwise comparisons
        dem_mean,dem_sd = run_pairwise_cos(np_d)
        rep_mean,rep_sd = run_pairwise_cos(np_r)

        return {"year":year,
                'topic':topic,
                'dem_mean':dem_mean,
                'dem_std':dem_sd,
                'rep_mean':rep_mean,
                "rep_std":rep_sd}

In [135]:
Results = []
pbar = tqdm(combinations)
for combination in pbar:
    pbar.set_description("%s %s"%combination)
    year,topic = combination
    if topic not in ['procedural','tribute','armenian_genocide']:
        f = run_conslidation(year,topic)
        if f:
            Results.append(f)


2016 healthcare: 100%|██████████| 2142/2142 [16:13<00:00,  2.20it/s]               


In [136]:
consolidation_df = pd.DataFrame(Results)

In [154]:
consolidation_df.to_csv('Results/Frame_consolidation.csv')