In [None]:
import pandas as pd

#data from https://www.opensecrets.org/orgs/all-profiles
data_dict = {
        'AstraZeneca':pd.read_csv('../Data/AstraZeneca.csv'),
        'BristolMyers':pd.read_csv('../Data/Bristol-Myers.csv'),
        'EliLilly':pd.read_csv('../Data/Eli_Lilly.csv'),
        'GlaxoSmith':pd.read_csv('../Data/GlaxoSmithKline.csv'),
        'Merck':pd.read_csv('../Data/Merck&Co.csv'),
        'Novartis':pd.read_csv('../Data/Novartis_AG.csv'),
        'NovoNordisk':pd.read_csv('../Data/Novo_Nordisk.csv'),
        'Pfizer':pd.read_csv('../Data/Pfizer_Inc.csv'),
        'PharmaResearch':pd.read_csv('../Data/Pharmaceutical_ResearchManufacturers.csv'),
        'Takeda':pd.read_csv('../Data/Takeda_Pharmaceutical.csv')
}

for company in list(data_dict.keys()):
        data_dict.update({company: data_dict[company][['Recipient','Total']]})
        data_dict[company]['Company'] = company

all_data = pd.concat(data_dict.values(), ignore_index=True)


In [None]:
from cosmograph import cosmo

all_nodes = {}
for i in set(all_data['Recipient'].tolist()):
    all_nodes.update({i:len(set(all_data[all_data['Recipient'] == i]['Company']))})

nodes = [x for x in list(all_nodes.keys()) if all_nodes[x] == 10] + list(set(all_data['Company'].tolist()))
dat = all_data[all_data['Recipient'].isin(nodes)]

sources = dat['Company']
targets = dat['Recipient']

categories = []
for i in nodes:
    if i in all_data['Company'].tolist():
          categories.append('Company')
    elif i in all_data['Recipient'].tolist():
          categories.append('Recipient')
    
points = pd.DataFrame({
    'id': nodes,
    'label': nodes,
    'category':categories
})

links = pd.DataFrame({
    'source': sources,
    'target': targets,
})

widget = cosmo(
  points=points,
  links=links,
  point_id_by='id',
  link_source_by='source',
  link_target_by='target',
  point_color_by='category',
  simulation_decay = 100,
  simulation_repulsion = 100,
  simulation_link_dist_random_variation_range = [1,1],
  simulation_friction = 1,
  point_include_columns=['value'],
  point_label_by='label',
  link_include_columns=['value'],
  link_arrows = False,
  render_hovered_point_ring = False,
  enable_drag = True,
  show_dynamic_labels=False,
  show_top_labels= False,
)
widget




In [None]:
main_dat = pd.DataFrame({'source':links['source'],'target':links['target']})

nodes = list(set(targets.tolist()))+list(set(sources.tolist()))
metadata= pd.DataFrame({'id':nodes})
metadata['value'] = (metadata['id'].isin(sources)).astype(int)


main_dat.to_csv('../Data/Processed_Data/main_dat.csv', index=False)
metadata.to_csv('../Data/Processed_Data/metadata.csv', index=False)


In [None]:
total_sources = all_data['Company']
total_targets = all_data['Recipient']
total_nodes = list(set(total_sources.tolist() + total_targets.tolist()))

categories = []
for i in total_nodes:
    if i in all_data['Company'].tolist():
          categories.append('Company')
    elif i in all_data['Recipient'].tolist():
          categories.append('Recipient')
    
total_points = pd.DataFrame({
    'id': total_nodes,
    'label': total_nodes,
    'category':categories
})

total_links = pd.DataFrame({
    'source': total_sources,
    'target': total_targets,
})

widget = cosmo(
  points=total_points,
  links=total_links,
  point_id_by='id',
  link_source_by='source',
  link_target_by='target',
  point_color_by='category',
  simulation_decay = 100,
  simulation_repulsion = 100,
  simulation_link_dist_random_variation_range = [1,1],
  simulation_friction = 1,
  point_include_columns=['value'],
  point_label_by='label',
  link_include_columns=['value'],
  link_arrows = False,
  render_hovered_point_ring = False,
  enable_drag = True,
  show_dynamic_labels=False,
  show_top_labels= False,
)
widget

In [37]:
all_main_dat = pd.DataFrame({'source':total_links['source'],'target':total_links['target']})

all_metadata= pd.DataFrame({'id':total_nodes})
all_metadata['value'] = (all_metadata['id'].isin(total_sources)).astype(int)


all_main_dat.to_csv('../Data/Processed_Data/all_main_dat.csv', index=False)
all_metadata.to_csv('../Data/Processed_Data/all_metadata.csv', index=False)

In [None]:
recipients = [x for x in list(all_nodes.keys()) if all_nodes[x] == 10]

tst = {}

for i in recipients:
    count = all_data[all_data['Recipient'] == i]
    tst.update({i:sum(count['Total'].tolist())})

avg_contribution = sum(list(tst.values()))/len(tst.values())
avg_votes_per_seat = ((161000000 * 0.46)/435)/2

avg_contribution_per_vote = avg_contribution/avg_votes_per_seat

print(round(avg_contribution_per_vote*100),'%',sep='')