In [None]:
import pandas as pd
import numpy as np
import os

In [None]:
os.getcwd()

In [None]:
os.makedirs('data/Gephi_files', exist_ok=True)

In [None]:
# Set input and output directory paths
input_dir = 'data/edgelists/'
output_dir = 'data/Gephi_files/'

# Loop over files in input directory
for filename in os.listdir(input_dir):
    
    if 'og' in filename:
        print('Ignore')
        continue

    # Load input file
    df = pd.read_csv(os.path.join(input_dir, filename), encoding="utf-8")

    # Process data and generate node and edge lists
    buyers = df[['buyer_id', 'buyer_contracts', 'buyer_cri', 'buyer_tot_value']]
    buyers['Label'] = 'buyer'
    buyers = buyers.rename(columns={'buyer_id': 'Id',
                                    'buyer_contracts': 'contract_number',
                                    'buyer_cri': 'cri',
                                    'buyer_tot_value': 'tot_value'})

    bidders = df[['bidder_id', 'bidder_contracts', 'bidder_cri', 'bidder_tot_value']]
    bidders['Label'] = 'supplier'
    bidders = bidders.rename(columns={'bidder_id': 'Id',
                                      'bidder_contracts': 'contract_number',
                                      'bidder_cri': 'cri',
                                      'bidder_tot_value': 'tot_value'})
    
    node_list = pd.concat([buyers, bidders])
    node_list = node_list.drop_duplicates(subset=['Id'])
    
    if 'beton' in filename:
        print('In Beton')
        beton = ['S_895','S_1534','S_825','S_499','S_7759','S_1533','S_7583',
         'S_824','S_857','S_856','S_826','S_896','S_827','S_500']
        node_list.loc[node_list['Id'].isin(beton), 'Label'] = 'supplier_beton'
    elif 'roads' in filename:
        print('In Roads')
        roads_buyer = ['B_1191','B_1481','B_21']
        node_list.loc[node_list['Id'].isin(roads_buyer), 'Label'] = 'buyer_roads'
        roads_supplier = ['S_11839']
        node_list.loc[node_list['Id'].isin(roads_supplier), 'Label'] = 'supplier_roads'
    
    node_list['tag'] = node_list['Label']
    edges = df[['buyer_id', 'bidder_id', 'edge_contracts', 'edge_cri', 'edge_tot_value']]
    edges = edges.rename(columns={'buyer_id': 'Source',
                                  'bidder_id': 'Target',
                                  'edge_contracts': 'edge_contracts',
                                  'edge_cri': 'edge_cri',
                                  'edge_tot_value': 'tot_value'})
    edges['Type'] = 'undirected'
    edges['edge_cri'] = np.where(edges['edge_cri'] == 0, 0.0000001, edges['edge_cri'])
    edges['Weight'] = edges['edge_cri']


    # Write output files
    node_list.to_csv(os.path.join(output_dir, f'nodelist_{filename}'), index=False)
    edges.to_csv(os.path.join(output_dir, f'edgelist_{filename}'), index=False)