In [None]:
# load packages
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# CREATE DATASETS

In [None]:
# load topic datasets with NOMINATE 
topic_def = pd.read_csv('df_def.csv', sep='\t')
topic_env = pd.read_csv('df_env.csv', sep='\t')
topic_inf = pd.read_csv('df_inf.csv', sep='\t')
topic_con = pd.read_csv('df_con.csv', sep='\t')
topic_leg = pd.read_csv('df_leg.csv', sep='\t')
topic_soc = pd.read_csv('df_soc.csv', sep='\t')
topic_for = pd.read_csv('df_for.csv', sep='\t')

# create new column of index values
topic_def['nameparty_id'] = topic_def.index
topic_env['nameparty_id'] = topic_env.index
topic_inf['nameparty_id'] = topic_inf.index
topic_con['nameparty_id'] = topic_con.index
topic_leg['nameparty_id'] = topic_leg.index
topic_soc['nameparty_id'] = topic_soc.index
topic_for['nameparty_id'] = topic_for.index

In [None]:
# create column with policy domain
topic_def['topic'] = 'defense and military'
topic_env['topic'] = 'environmental and natural resources'
topic_con['topic'] = 'government budget and administration'
topic_inf['topic'] = 'infrastructure and development'
topic_for['topic'] = 'international relations and government'
topic_leg['topic'] = 'legislation and policy'
topic_soc['topic'] = 'social services and public welfare'

# keep important columns from datasets
cols = ['coord1D', 'nameparty_id', 'topic']
topic_def = topic_def[cols]
topic_env = topic_env[cols]
topic_con = topic_con[cols]
topic_inf = topic_inf[cols]
topic_for = topic_for[cols]
topic_leg = topic_leg[cols]
topic_soc = topic_soc[cols]

# rename relevant NOMINATE column
topic_def.rename(columns={'coord1D':'NOM1D'}, inplace=True)
topic_env.rename(columns={'coord1D':'NOM1D'}, inplace=True)
topic_con.rename(columns={'coord1D':'NOM1D'}, inplace=True)
topic_inf.rename(columns={'coord1D':'NOM1D'}, inplace=True)
topic_for.rename(columns={'coord1D':'NOM1D'}, inplace=True)
topic_leg.rename(columns={'coord1D':'NOM1D'}, inplace=True)
topic_soc.rename(columns={'coord1D':'NOM1D'}, inplace=True)

In [None]:
# add subsets to each other to create one dataset
df_topic = pd.concat([topic_def, topic_env, topic_con, topic_inf, topic_for, topic_leg, topic_soc])

In [None]:
# load members dataset with members
df_mem = pd.read_csv('df_members_fv.tsv', sep='\t')

# join members and NOMINATE datasets

df_mem = pd.merge(df_mem, df_topic, on=['nameparty_id'], how='left')

In [None]:
# keep relevant columns in members and NOMINATE dataset
cols = ['bioname', 'party', 'congress', 'nameparty_id', 'member_id', 'NOM1D', 'topic']
df_nom = df_mem[cols]

In [None]:
# calculate both party's mean and standard deviation NOMINATE value for each topic per congress
mean_values = df_nom.groupby(['congress', 'party', 'topic'])['NOM1D'].agg(['mean', 'std']).reset_index()

# rename columns
mean_values.rename(columns={'mean': 'mean_nom1', 'std': 'std_nom1'}, inplace=True)

# add mean and standard deviation values onto members NOMINATE dataset
df_nom = pd.merge(df_nom, mean_values, on=['party', 'congress', 'topic'])

In [None]:
# change topic names
rename_dict = {
    'defense and military': 'Defense and Military',
    'environmental and natural resources': 'Environment and Natural Resources',
    'government budget and administration': 'Government Budget and Administration',
    'infrastructure and development': 'Infrastructure and Development',
    'international relations and government': 'International Relations and Government',
    'legislation and policy': 'Legislation and Policy',
    'social services and public welfare': 'Social Services and Public Welfare'
}

df_nom['topic'] = df_nom['topic'].replace(rename_dict)

# PLOTS

In [None]:
# create list of topics
topics = ['Environment and Natural Resources', 'Infrastructure and Development', 'Government Budget and Administration', 'Defense and Military', 'International Relations and Government', 'Legislation and Policy', 'Social Services and Public Welfare']

# set color and marker of parties
colors = {'Democratic Party': 'blue', 'Republican Party': 'red', 'diff': 'grey'}
markers = {'Democratic Party': 'o', 'Republican Party': '^', 'diff': 's'}

# create subplot
fig, axs = plt.subplots(4, 2, figsize=(10, 13))
axs = axs.flatten()

# loop through all topics
for i, topic in enumerate(topics):
    # legend kept intersecting lines for this topic, so have to manually alter limits of plot
    if topic == 'International Relations and Government':
        # create subset for topic
        sub_df = df_nom[df_nom['topic'] == topic]
        # create pivot table with mean NOMINATE as values
        mean_pivot = sub_df.pivot_table(index='congress', columns='party', values='mean_nom1').reset_index()
        # create pivot table with std NOMINATE as values
        std_pivot = sub_df.pivot_table(index='congress', columns='party', values='std_nom1').reset_index()
        # calculate difference between mean values of parties (polarization)
        mean_pivot['diff'] = (mean_pivot['Republican Party'] - mean_pivot['Democratic Party']).abs()

        # add std to mean pivot table
        mean_pivot['Republican_std'] = std_pivot['Republican Party']
        mean_pivot['Democratic_std'] = std_pivot['Democratic Party']

        # plot mean spatial location of Democratic Party per congress using lineplot
        sns.lineplot(data=mean_pivot, x='congress', y='Democratic Party', ax=axs[i], marker='o', color='blue', label='D')
        # add std to line
        axs[i].fill_between(
            mean_pivot['congress'],
            mean_pivot['Democratic Party'] - mean_pivot['Democratic_std'],
            mean_pivot['Democratic Party'] + mean_pivot['Democratic_std'],
            color='blue', alpha=0.1
        )

        # plot mean spatial location of Republican Party per congress using lineplot
        sns.lineplot(data=mean_pivot, x='congress', y='Republican Party', ax=axs[i], marker='^', color='red', label='R')
        # add std to line
        axs[i].fill_between(
            mean_pivot['congress'],
            mean_pivot['Republican Party'] - mean_pivot['Republican_std'],
            mean_pivot['Republican Party'] + mean_pivot['Republican_std'],
            color='red', alpha=0.1
        )

        # plot difference line 
        sns.lineplot(data=mean_pivot, x='congress', y='diff', marker='s', ax=axs[i], color='grey', linestyle='--', label='Diff')

        # create labels and set limit of y-axis
        axs[i].set_xlabel('Congress')
        axs[i].set_ylabel('Mean NOMINATE')
        axs[i].set_ylim(top=1.5)
        axs[i].set_title(f'{topic}')
        axs[i].legend(loc='upper left')
    else:
        # create subset per topic
        sub_df = df_nom[df_nom['topic'] == topic]
        # create pivot table with mean NOMINATE as values
        mean_pivot = sub_df.pivot_table(index='congress', columns='party', values='mean_nom1').reset_index()
        # create pivot table with std NOMINATE as values
        std_pivot = sub_df.pivot_table(index='congress', columns='party', values='std_nom1').reset_index()
        # calculate difference between mean values of parties (polarization)
        mean_pivot['diff'] = (mean_pivot['Republican Party'] - mean_pivot['Democratic Party']).abs()

        # add std to mean pivot table
        mean_pivot['Republican_std'] = std_pivot['Republican Party']
        mean_pivot['Democratic_std'] = std_pivot['Democratic Party']

        # plot mean spatial location of Democratic Party per congress using lineplot
        sns.lineplot(data=mean_pivot, x='congress', y='Democratic Party', ax=axs[i], marker='o', color='blue', label='D')
        # add std to line
        axs[i].fill_between(
            mean_pivot['congress'],
            mean_pivot['Democratic Party'] - mean_pivot['Democratic_std'],
            mean_pivot['Democratic Party'] + mean_pivot['Democratic_std'],
            color='blue', alpha=0.1
        )

        # plot mean spatial location of Republican Party per congress using lineplot
        sns.lineplot(data=mean_pivot, x='congress', y='Republican Party', ax=axs[i], marker='^', color='red', label='R')
        # add std to line
        axs[i].fill_between(
            mean_pivot['congress'],
            mean_pivot['Republican Party'] - mean_pivot['Republican_std'],
            mean_pivot['Republican Party'] + mean_pivot['Republican_std'],
            color='red', alpha=0.1
        )

        # plot difference line 
        sns.lineplot(data=mean_pivot, x='congress', y='diff', marker='s', ax=axs[i], color='grey', linestyle='--', label='Diff')

        # create labels and title
        axs[i].set_xlabel('Congress')
        axs[i].set_ylabel('Mean NOMINATE')
        axs[i].set_title(f'{topic}')
        axs[i].legend(loc='upper left')

# if plot is empty, remove from subplot
for j in range(len(topics), 4 * 2):
    fig.delaxes(axs[j])

plt.tight_layout()

# save figure
plt.savefig('polarization_nom_topics.png')

plt.show()

In [None]:
# create one pivot table with differences as values, congresses as rows and topics as columns

# create subset of topic
sub_df = df_nom[df_nom['topic'] == 'Defense and Military']
# create pivot table of defense topic
mean_nom_pivot = sub_df.pivot_table(index=['congress'], columns='party', values='mean_nom1')
# add difference to pivot table
mean_nom_pivot['defense'] = (mean_nom_pivot['Republican Party'] - mean_nom_pivot['Democratic Party']).abs()

# create subset of topic and add difference to first pivot table
df = df_nom[df_nom['topic'] == 'Environment and Natural Resources']
pivot = df.pivot_table(index=['congress'], columns='party', values='mean_nom1')
mean_nom_pivot['environment'] = (pivot['Republican Party'] - pivot['Democratic Party']).abs()

# create subset of topic and add difference to first pivot table
df = df_nom[df_nom['topic'] == 'Government Budget and Administration']
pivot = df.pivot_table(index=['congress'], columns='party', values='mean_nom1')
mean_nom_pivot['government'] = (pivot['Republican Party'] - pivot['Democratic Party']).abs()

# create subset of topic and add difference to first pivot table
df = df_nom[df_nom['topic'] == 'Infrastructure and Development']
pivot = df.pivot_table(index=['congress'], columns='party', values='mean_nom1')
mean_nom_pivot['infrastructure'] = (pivot['Republican Party'] - pivot['Democratic Party']).abs()

# create subset of topic and add difference to first pivot table
df = df_nom[df_nom['topic'] == 'International Relations and Government']
pivot = df.pivot_table(index=['congress'], columns='party', values='mean_nom1')
mean_nom_pivot['international'] = (pivot['Republican Party'] - pivot['Democratic Party']).abs()

# create subset of topic and add difference to first pivot table
df = df_nom[df_nom['topic'] == 'Legislation and Policy']
pivot = df.pivot_table(index=['congress'], columns='party', values='mean_nom1')
mean_nom_pivot['legislation'] = (pivot['Republican Party'] - pivot['Democratic Party']).abs()

# create subset of topic and add difference to first pivot table
df = df_nom[df_nom['topic'] == 'Social Services and Public Welfare']
pivot = df.pivot_table(index=['congress'], columns='party', values='mean_nom1')
mean_nom_pivot['social services'] = (pivot['Republican Party'] - pivot['Democratic Party']).abs()

mean_nom_pivot

In [None]:
# drop party columns, thereby keeping only the differences within each topic
columns_to_drop = ['Democratic Party', 'Republican Party']
pivot_diff = mean_nom_pivot.drop(columns=columns_to_drop)

# rename party to topic
pivot_diff = pivot_diff.rename_axis(columns='topic')

# reset index to fix form of pivot table
pivot_table = pivot_diff.reset_index()

In [None]:
# order topics 
new_order = ['congress', 'environment', 'infrastructure', 'government', 'defense', 'international', 'legislation', 'social services']
pivot_table = pivot_table[new_order]

In [None]:
# change pivot table into new matrix 
df_melted = pd.melt(pivot_table, id_vars='congress', var_name='topic', value_name='difference')

# set palette 
palette = sns.color_palette('deep')

# create lineplot for each topic with differences per congress as values
sns.lineplot(data=df_melted, x='congress', y='difference', hue='topic', palette=palette, marker='o')

legend = plt.legend()
legend.set_title(None)

# set labels and turn on grid
plt.xlabel('Congress')
plt.ylabel('Mean NOMINATE Difference')
plt.grid(True)
# save figure
plt.savefig('difference_topics.png')
plt.show()

In [None]:
# calculate for each party in topic the differences in location between the 93rd and 118th congress
for i, topic in enumerate(topics):
    sub_df = df_nom[df_nom['topic'] == topic]
    mean_pivot = sub_df.pivot_table(index='congress', columns='party', values='mean_nom1').reset_index()
    nom_93 = mean_pivot.loc[(mean_pivot['congress'] == 93), 'Democratic Party'].values
    nom_118 = mean_pivot.loc[(mean_pivot['congress'] == 118), 'Democratic Party'].values
    diffd = abs(nom_93 - nom_118)

    nom_93 = mean_pivot.loc[(mean_pivot['congress'] == 93), 'Republican Party'].values
    nom_118 = mean_pivot.loc[(mean_pivot['congress'] == 118), 'Republican Party'].values
    diffr = abs(nom_93 - nom_118)
    print(topic)
    print(diffd)
    print(diffr)

In [None]:
# calculate increase in polarization from 93rd to 118th congress

num_rows = len(pivot_table)
abs_diff = {}
for column in pivot_table.columns:
    # calculate absolute difference between first and last congress
    difference = abs(pivot_table.iloc[num_rows - 1][column] - pivot_table.iloc[0][column])
    abs_diff[column] = difference

print("Absolute differences:")
print(abs_diff)