# Topic disaggregation

### Repeat the same process on data previously disaggregated according to the categories of the vote

In [2]:
import pandas as pd
import networkx as nx
import numpy as np
import re

import glob, os
import itertools
from tqdm import tqdm

import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
import seaborn as sns
from shapely.geometry import LineString
from matplotlib.lines import Line2D

In [3]:
import sys
sys.path.insert(0, '../local/bin')

In [4]:
def create_members_df(members, party_codes):
    temp_congress = members.groupby('icpsr', as_index=False)[['congress']].agg(lambda x: list(x))                                                           # group by icpsr and aggregate the congress numbers into a list
    temp_party = members.groupby('icpsr', as_index=False)[['party_code']].agg(lambda x: list(set(x)))                                                       # group by icpsr and aggregate the party codes into a list
    temp_congress = temp_congress.merge(temp_party)                                                                                                         # merge the two dataframes
    temp_congress['bioname'] = temp_congress['icpsr'].map(members[['icpsr', 'bioname']].set_index('icpsr').to_dict()['bioname'])                            # insert the bioname based on the icpsr 
    temp_congress['state_abbrev'] = temp_congress['icpsr'].map(members[['icpsr', 'state_abbrev']].set_index('icpsr').to_dict()['state_abbrev'])             # insert the state_abbrev based on the icpsr
    party_codes_dic = party_codes[['party_name', 'party_code']].set_index('party_code').to_dict()['party_name']                                             # create a dictionary for the party codes
    temp_congress['party_name'] = temp_congress['party_code'].apply(lambda x: [party_codes_dic[y] for y in x])                                              # insert the party name based on the party code
    return temp_congress

In [5]:
members = pd.read_csv('../dataset/HSall_members.csv')
party_codes = pd.read_csv('../dataset/HSall_parties.csv')

members_info = create_members_df(members, party_codes)

member_party_dict = members_info.set_index('icpsr')['party_name'].to_dict()		# member_id -> party_name

In [6]:
def create_edgelist_from_congress(congress, members_party_dict):
	edgelist = pd.DataFrame()

	for voteid in tqdm(set(congress['id'])):                 # iterate over all votes id (ids are unique for each vote)

		temp = congress[congress['id'] == voteid]            # select the rows where the vote id is equal to the current vote id            

		yy = temp[temp['vote']=='Yea']['icpsr']              # select the icpsr of the members that voted "Yea"
		nn = temp[temp['vote']=='Nay']['icpsr']                         

		y = itertools.combinations(yy, 2)                    # all possible combinations of 2 members that voted "Yea"
		n = itertools.combinations(nn, 2)                
		o = itertools.product(yy, nn)                        # cartesian product of the 2 series

		y = pd.DataFrame(y, columns = ['source', 'target'])  # create a dataframe from the combinations of "Yea" voters
		y['weight'] = 1                                      # add a column with the weight of the edge
		y['count'] = 1                                         

		n = pd.DataFrame(n, columns = ['source', 'target'])     
		n['weight'] = 1                                         
		n['count'] = 1                                          

		o = pd.DataFrame(o, columns = ['source', 'target'])     
		o['weight'] = -1                                     # same but the link is negative                    
		o['count'] = 1                                          

		edgelist = pd.concat([edgelist, y, n, o])
		edgelist = edgelist.groupby(['source', 'target', 'weight']).sum().reset_index()     # group by source, target and weight and sum the count                  

	edgelist = pd.concat([edgelist, pd.DataFrame({
		'source': edgelist['target'],                        # new columns based on old columns: 
		'target': edgelist['source'],                        #   'newcolumn': dataframe['oldcolumn']
		'weight': edgelist['weight'],
		'count': edgelist['count']})])

	edgelist = edgelist.loc[edgelist['source'] < edgelist['target']]                    # remove duplicates
	edgelist = edgelist.groupby(['source', 'target', 'weight']).sum().reset_index()     # group by source, target and weight and sum the count
	edgelist['party'] = edgelist.apply(lambda row: 'in' if members_party_dict[row['source']] == members_party_dict[row['target']] else 'out', axis=1)   # create a column with the party of the edge

	map_votes = edgelist.groupby(['source', 'target'])['count'].sum().to_dict()                                                                         # create a dictionary with the number of votes togheter for each pair of nodes                               

	edgelist['votes_togheter'] = edgelist[['source', 'target']].apply(lambda x: map_votes[(x['source'], x['target'])], axis=1)
	edgelist['perc'] = edgelist['count']/edgelist['votes_togheter']

	return edgelist

In [7]:
congress_path = '../dataset/download_votes_merged/*'
congress_path_senate = '../dataset/download_votes_merged_senate/*'

In [8]:
# Initialize a weighted bipartite graph
G = nx.Graph()

# Iterate through each CSV file
for csv_file in tqdm(glob.glob(congress_path)):
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(csv_file)
    
    # Count the occurrences of each pair of Clausen and Peltzman categories
    counts = df.groupby(["Clausen", "Peltzman"]).size().reset_index(name="count")
    
    # Add edges to the graph with weights based on the counts
    for _, row in counts.iterrows():
        clausen_category = row["Clausen"]
        peltzman_category = row["Peltzman"]
        count = row["count"]
        
        G.add_node(clausen_category, bipartite=0)
        G.add_node(peltzman_category, bipartite=1)
        G.add_edge(clausen_category, peltzman_category, weight=count)

# Now you have a weighted bipartite graph (G) with edges representing the co-occurrences
# between Clausen and Peltzman categories and weights representing the number of occurrences.

  df = pd.read_csv(csv_file)
100%|██████████| 41/41 [00:40<00:00,  1.01it/s]


In [9]:
print('Clausen categories: ', [n for n in G.nodes() if G.nodes[n]['bipartite'] == 0])
print('Number of Clausen categories: ', len([n for n in G.nodes() if G.nodes[n]['bipartite'] == 0]))

print('Peltzman categories: ', [n for n in G.nodes() if G.nodes[n]['bipartite'] == 1])
print('Number of Peltzman categories: ', len([n for n in G.nodes() if G.nodes[n]['bipartite'] == 1]))

Clausen categories:  ['Agriculture', 'Civil Liberties', 'Foreign and Defense Policy', 'Government Management', 'Miscellaneous Policy', 'Social Welfare']
Number of Clausen categories:  6
Peltzman categories:  ['Budget Special Interest', 'Regulation General Interest', 'Regulation Special Interest', 'Domestic Social Policy', 'Defense Policy Budget', 'Defense Policy Resolutions', 'Foreign Policy Budget', 'Foreign Policy Resolutions', 'Budget General Interest', 'D. C.', 'Government Organization', 'Indian Affairs', 'Internal Organization']
Number of Peltzman categories:  13


____________________________________________________

### Disaggregate the congresses

In [10]:
clausen_cat_list = [n for n in G.nodes() if G.nodes[n]['bipartite'] == 0]
peltzman_cat_list = [n for n in G.nodes() if G.nodes[n]['bipartite'] == 1]

In [11]:
clausen_cat_dic = {x: re.sub(' ', '_', x) for x in clausen_cat_list}
peltzman_cat_dic = {x: re.sub(' ', '_', x) for x in peltzman_cat_list}

In [12]:
os.makedirs('../dataset/votes_house_clausen', exist_ok=True)
os.makedirs('../dataset/votes_senate_clausen', exist_ok=True)

os.makedirs('../dataset/votes_house_peltzman', exist_ok=True)
os.makedirs('../dataset/votes_senate_peltzman', exist_ok=True)

In [13]:
for clausen in clausen_cat_dic.values():
	os.makedirs('../dataset/votes_house_clausen/'+clausen, exist_ok=True)
	os.makedirs('../dataset/votes_senate_clausen/'+clausen, exist_ok=True)

for peltzman in peltzman_cat_dic.values():
	os.makedirs('../dataset/votes_house_peltzman/'+peltzman, exist_ok=True)
	os.makedirs('../dataset/votes_senate_peltzman/'+peltzman, exist_ok=True)

In [14]:
for csv in tqdm(glob.glob(congress_path)):
    n_congress = int(os.path.basename(csv).replace('congress_', '').replace('.csv', ''))

    df_congress = pd.read_csv(csv)

    for i in clausen_cat_dic.keys():
        filtered_df = df_congress[df_congress['Clausen'] == i]
        if not filtered_df.empty:
            filtered_df.to_csv('../dataset/votes_house_clausen/'+clausen_cat_dic[i]+'/congress_'+str(n_congress)+'.csv', index=False)

    for i in peltzman_cat_dic.keys():
        filtered_df = df_congress[df_congress['Peltzman'] == i]
        if not filtered_df.empty:
            filtered_df.to_csv('../dataset/votes_house_peltzman/'+peltzman_cat_dic[i]+'/congress_'+str(n_congress)+'.csv', index=False)

  df_congress = pd.read_csv(csv)
100%|██████████| 41/41 [03:00<00:00,  4.39s/it]


In [15]:
for csv in tqdm(glob.glob(congress_path_senate)):
    n_congress = int(os.path.basename(csv).replace('congress_', '').replace('.csv', ''))

    df_congress = pd.read_csv(csv)

    for i in clausen_cat_dic.keys():
        filtered_df = df_congress[df_congress['Clausen'] == i]
        if not filtered_df.empty:
            filtered_df.to_csv('../dataset/votes_senate_clausen/'+clausen_cat_dic[i]+'/congress_'+str(n_congress)+'.csv', index=False)

    for i in peltzman_cat_dic.keys():
        filtered_df = df_congress[df_congress['Peltzman'] == i]
        if not filtered_df.empty:
            filtered_df.to_csv('../dataset/votes_senate_peltzman/'+peltzman_cat_dic[i]+'/congress_'+str(n_congress)+'.csv', index=False)

100%|██████████| 41/41 [00:35<00:00,  1.16it/s]


_________________________________________________________________________

### Create edgelists

Create folders

In [16]:
os.makedirs('../dataset/edgelists/', exist_ok=True)

os.makedirs('../dataset/edgelists/votes_house_clausen_edges/', exist_ok=True)
os.makedirs('../dataset/edgelists/votes_house_peltzman_edges/', exist_ok=True)

os.makedirs('../dataset/edgelists/votes_senate_clausen_edges/', exist_ok=True)
os.makedirs('../dataset/edgelists/votes_senate_peltzman_edges/', exist_ok=True)

In [17]:
for clausen in clausen_cat_dic.values():
	os.makedirs('../dataset/edgelists/votes_house_clausen_edges/'+clausen, exist_ok=True)
	os.makedirs('../dataset/edgelists/votes_senate_clausen_edges/'+clausen, exist_ok=True)

for peltzman in peltzman_cat_dic.values():
	os.makedirs('../dataset/edgelists/votes_house_peltzman_edges/'+peltzman, exist_ok=True)
	os.makedirs('../dataset/edgelists/votes_senate_peltzman_edges/'+peltzman, exist_ok=True)

Create house of representatives edgelists

In [None]:
for clausen in clausen_cat_dic.values():
	for csv in glob.glob('../dataset/votes_house_clausen/'+clausen+'/*'):
		n_congress = int(os.path.basename(csv).replace('congress_', '').replace('.csv', ''))
		df_congress = pd.read_csv(csv)
		edgelist = create_edgelist_from_congress(df_congress, member_party_dict)
		edgelist.to_csv('../dataset/edgelists/votes_house_clausen_edges/'+clausen+'/congress_'+str(n_congress)+'.csv', index=False)

In [None]:
for peltzman in peltzman_cat_dic.values():
	for csv in glob.glob('../dataset/votes_house_peltzman/'+peltzman+'/*'):
		n_congress = int(os.path.basename(csv).replace('congress_', '').replace('.csv', ''))
		df_congress = pd.read_csv(csv)
		edgelist = create_edgelist_from_congress(df_congress, member_party_dict)
		edgelist.to_csv('../dataset/edgelists/votes_house_peltzman_edges/'+peltzman+'/congress_'+str(n_congress)+'.csv', index=False)

Create senate edgelist

In [18]:
for clausen in clausen_cat_dic.values():
	for csv in glob.glob('../dataset/votes_senate_clausen/'+clausen+'/*'):
		n_congress = int(os.path.basename(csv).replace('congress_', '').replace('.csv', ''))
		df_congress = pd.read_csv(csv)
		edgelist = create_edgelist_from_congress(df_congress, member_party_dict)
		edgelist.to_csv('../dataset/edgelists/votes_senate_clausen_edges/'+clausen+'/congress_'+str(n_congress)+'.csv', index=False)

  0%|          | 0/24 [00:00<?, ?it/s]

100%|██████████| 24/24 [00:00<00:00, 119.56it/s]
100%|██████████| 31/31 [00:00<00:00, 144.02it/s]
100%|██████████| 18/18 [00:00<00:00, 129.97it/s]
100%|██████████| 10/10 [00:00<00:00, 136.96it/s]
100%|██████████| 18/18 [00:00<00:00, 119.04it/s]
100%|██████████| 46/46 [00:00<00:00, 130.81it/s]
100%|██████████| 28/28 [00:00<00:00, 140.87it/s]
100%|██████████| 14/14 [00:00<00:00, 145.35it/s]
100%|██████████| 31/31 [00:00<00:00, 127.40it/s]
100%|██████████| 19/19 [00:00<00:00, 126.66it/s]
100%|██████████| 41/41 [00:00<00:00, 158.41it/s]
100%|██████████| 38/38 [00:00<00:00, 137.39it/s]
100%|██████████| 21/21 [00:00<00:00, 135.93it/s]
100%|██████████| 19/19 [00:00<00:00, 130.59it/s]
100%|██████████| 27/27 [00:00<00:00, 143.78it/s]
100%|██████████| 48/48 [00:00<00:00, 140.86it/s]
100%|██████████| 31/31 [00:00<00:00, 142.16it/s]
100%|██████████| 23/23 [00:00<00:00, 156.25it/s]
100%|██████████| 18/18 [00:00<00:00, 115.19it/s]
100%|██████████| 22/22 [00:00<00:00, 141.14it/s]
100%|██████████| 18/

In [19]:
for peltzman in peltzman_cat_dic.values():
	for csv in glob.glob('../dataset/votes_senate_peltzman/'+peltzman+'/*'):
		n_congress = int(os.path.basename(csv).replace('congress_', '').replace('.csv', ''))
		df_congress = pd.read_csv(csv)
		edgelist = create_edgelist_from_congress(df_congress, member_party_dict)
		edgelist.to_csv('../dataset/edgelists/votes_senate_peltzman_edges/'+peltzman+'/congress_'+str(n_congress)+'.csv', index=False)

100%|██████████| 178/178 [00:01<00:00, 115.56it/s]
100%|██████████| 82/82 [00:00<00:00, 142.51it/s]
100%|██████████| 204/204 [00:01<00:00, 122.98it/s]
100%|██████████| 108/108 [00:00<00:00, 125.18it/s]
100%|██████████| 133/133 [00:01<00:00, 118.71it/s]
100%|██████████| 144/144 [00:01<00:00, 124.54it/s]
100%|██████████| 116/116 [00:00<00:00, 141.63it/s]
100%|██████████| 265/265 [00:02<00:00, 110.81it/s]
100%|██████████| 148/148 [00:01<00:00, 122.46it/s]
100%|██████████| 189/189 [00:01<00:00, 124.04it/s]
100%|██████████| 92/92 [00:00<00:00, 153.32it/s]
100%|██████████| 244/244 [00:02<00:00, 121.04it/s]
100%|██████████| 145/145 [00:01<00:00, 118.35it/s]
100%|██████████| 94/94 [00:00<00:00, 114.67it/s]
100%|██████████| 65/65 [00:00<00:00, 121.08it/s]
100%|██████████| 102/102 [00:00<00:00, 133.59it/s]
100%|██████████| 132/132 [00:01<00:00, 105.64it/s]
100%|██████████| 104/104 [00:00<00:00, 115.55it/s]
100%|██████████| 121/121 [00:01<00:00, 113.34it/s]
100%|██████████| 182/182 [00:01<00:00, 

_______________________________________________________________________________________

### Perform the analysis on the disaggregated data

In [22]:
import sygno
from sygno.signed_networkx import draw_signed_networkx

In [28]:
parties = pd.read_csv('../dataset/HSall_parties.csv')
members = pd.read_csv('../dataset/HSall_members.csv')

In [44]:
# define a dictionary where keys is the column 'party_code' and values is the column 'party_name' from the parties dataframe
party_codes_dic = parties[['party_name', 'party_code']].set_index('party_code').to_dict()['party_name']
party_codes_dic

{5000: 'Pro-Administration',
 4000: 'Anti-Administration',
 1: 'Federalist',
 13: 'Democrat-Republican',
 1346: 'Jackson Republican',
 6000: 'Crawford Federalist',
 7000: 'Jackson Federalist',
 7777: 'Crawford Republican',
 8000: 'Adams-Clay Federalist',
 8888: 'Adams-Clay Republican',
 22: 'Adams',
 555: 'Jackson',
 26: 'Anti Masonic',
 1275: 'Anti-Jackson',
 44: 'Nullifier',
 29: 'Whig',
 100: 'Democrat',
 328: 'Independent',
 112: 'Conservative',
 329: 'Ind. Democrat',
 403: 'Law and Order',
 603: 'Ind. Whig',
 310: 'American',
 1111: 'Liberty',
 300: 'Free Soil',
 46: 'States Rights',
 4444: 'Union',
 200: 'Republican',
 3333: 'Opposition',
 108: 'Anti-Lecompton Democrat',
 3334: 'Opposition',
 37: 'Constitutional Unionist',
 206: 'Unionist',
 203: 'Unconditional Unionist',
 331: 'Ind. Republican',
 1116: 'Conservative Republican',
 208: 'Liberal Republican',
 326: 'National Greenbacker',
 117: 'Readjuster Democrat',
 114: 'Readjuster',
 355: 'Union',
 356: 'Union Labor',
 340: 'Po

In [None]:
pol = members

# Assuming you want to remove duplicates from the 'icpsr' column
pol = pol.drop_duplicates(subset='icpsr')

# Now you can set 'icpsr' as the index
pol.set_index('icpsr', inplace=True)

# Apply the color mapping to the 'color' column
pol['color'] = pol['party_name'].apply(lambda x: 'firebrick' if 'Republican' in x else ('royalblue' if 'Democrat' in x else 'yellow'))

# Convert the relevant columns to a dictionary
pol_dic = pol[['party_name']].to_dict(orient='index')
pol_dic

In [47]:
pol.head()
# qualcosa non va... da rivedere confrontando con plot_compute_least_eigen.ipynb

Unnamed: 0_level_0,congress,chamber,state_icpsr,district_code,state_abbrev,party_code,occupancy,last_means,bioname,bioguide_id,...,nominate_dim2,nominate_log_likelihood,nominate_geo_mean_probability,nominate_number_of_votes,nominate_number_of_errors,conditional,nokken_poole_dim1,nokken_poole_dim2,party_name,color
icpsr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
99869,1,President,99,0.0,USA,5000,,,"WASHINGTON, George",,...,,,,,,,,,Pro-Administration,yellow
379,1,House,44,2.0,GA,4000,0.0,1.0,"BALDWIN, Abraham",B000084,...,-0.373,-28.55029,0.758,103.0,12.0,,-0.429,-0.817,Anti-Administration,yellow
4854,1,House,44,1.0,GA,4000,0.0,1.0,"JACKSON, James",J000017,...,-0.181,-24.89986,0.776,98.0,9.0,,-0.559,-0.052,Anti-Administration,yellow
6071,1,House,44,3.0,GA,4000,0.0,1.0,"MATHEWS, George",M000234,...,-0.317,-12.62728,0.88,99.0,2.0,,-0.413,-0.232,Anti-Administration,yellow
1538,1,House,52,6.0,MD,5000,0.0,1.0,"CARROLL, Daniel",C000187,...,-0.74,-23.47008,0.783,96.0,11.0,,0.114,-0.779,Pro-Administration,yellow


In [45]:
def draw_graph(df, pol, congress_label):
    G = nx.from_pandas_edgelist(df, 'source', 'target', ['weight'])

    nodeData = {list(g.icpsr)[0]: {'state': list(g.state_abbrev)[0], 'party': list(g.party_name)[0], 'color': list(g.color)[0],  'congress': list(g.congress)[0], 'name': list(g.bioname)[0]} for k, g in pol.groupby('icpsr')}
    nx.set_node_attributes(G, nodeData)

    colors = [node[1]['color'] for node in G.nodes(data=True)]
    
    fig, ax, pos, least_eigenvalue = draw_signed_networkx(G, node_color=colors, compact = True, theme="white", normalize=True, scale='log', jittering=0.01, margin=0.2, show_rotation=False, highlight_edges='frustrated', remove_n_outliers=3,sort_by='party')
    
    # print infos about users per party
    positions_party = {'pos': {'Democrat': 0,'Republican':0}, 'neg': {'Democrat': 0,'Republican':0}}
    for p in pos.keys():
        if pos[p].x > 0:
            if pol_dic[int(p)]['party_name'] == 'Democrat' or pol_dic[int(p)]['party_name'] == 'Republican':
                positions_party['pos'][pol_dic[int(p)]['party_name']] += 1
        else:
            if pol_dic[int(p)]['party_name'] == 'party_name' or pol_dic[int(p)]['party_name'] == 'Republican':
                positions_party['neg'][pol_dic[int(p)]['party_name']] += 1

    plt.text(0, .04, "Rep: "+str(positions_party['neg']['Republican']), fontsize=13, transform=ax.transAxes, color="ghostwhite")
    plt.text(0, .09, "Dem: "+str(positions_party['neg']['Democrat']), fontsize=13, transform=ax.transAxes, color="ghostwhite")

    plt.text(0.9, .04, "Rep: "+str(positions_party['pos']['Republican']), fontsize=13, transform=ax.transAxes, color="ghostwhite")
    plt.text(0.9, .09, "Dem: "+str(positions_party['pos']['Democrat']), fontsize=13, transform=ax.transAxes, color="ghostwhite")
    

    plt.show()

In [46]:
def read_edgelist(folder, threshold_pos, threshold_neg, congress_num):
    # read edgelist
	df = pd.read_csv(folder)
	df_pos = df.loc[(df['weight']==1)&(df['perc']>threshold_pos)].copy()
	df_neg = df.loc[(df['weight']==-1)&(df['perc']>threshold_neg)].copy()
	df = pd.concat([df_pos, df_neg])

	draw_graph(df, pol, congress_num)
		
	def _midpoint(p1, p2):
		return {'x': (p1['x']+p2['x'])/2, 'y': (p1['y']+p2['y'])/2}

	def line_intersection(in_state, out_state, intersect_points):
		index_in = np.argmax(in_state[1])
		index_out = np.argmax(out_state[1])

        # points of the mean of the distributions 
		point_in={'x': in_state[0][index_in], 'y': in_state[1][index_in]}
		point_out={'x': out_state[0][index_out], 'y': out_state[1][index_out]}

        # medianpoint (mean of the means) of the two distributions
		midpoint = _midpoint(point_in, point_out)
        
        #find index of intersection closer to midpoint
		index_closer = np.argmin([np.sqrt( (p[0] - midpoint['x'])**2 + (p[1] - midpoint['y'])**2 ) for p in intersect_points])

        # return x value of closer intersection
		return intersect_points[index_closer][0]
    

	#label = "agree" if weight == 1 else "disagree"
	x0 = df.loc[(df['state']=='in')&(df['weight'] == weight)]['perc']
	x1 = df.loc[(df['state']=='out')&(df['weight'] == weight)]['perc']

	bw = len(x0)**(-1./(2+4))
	kde0 = gaussian_kde(x0, bw_method=bw)
	bw = len(x1)**(-1./(2+4))
	kde1 = gaussian_kde(x1, bw_method=bw)

	xmin = min(x0.min(), x1.min())
	xmax = max(x0.max(), x1.max())
	dx = 0.2 * (xmax - xmin) # add a 20% margin, as the kde is wider than the data
	xmin -= dx
	xmax += dx

	x = np.linspace(xmin, xmax, 500)
	kde0_x = kde0(x)
	kde1_x = kde1(x)

	idx = np.argwhere(np.diff(np.sign(kde0_x - kde1_x))).flatten()

	threshold = line_intersection([x, kde0_x], [x, kde0_x], [[x,y] for x,y in zip (x[idx], kde1_x[idx])])

	return threshold