In [1]:
import json
import time
from concurrent.futures import ThreadPoolExecutor, TimeoutError
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import refinitiv.data as rd
from refinitiv.data.content import symbol_conversion, search

## Step 0: Open Refinitiv API session

In [2]:
rd.open_session()

<refinitiv.data.session.Definition object at 0x14222bb50 {name='workspace'}>

## Step 1: Get RIC Codes for a batch of firms

In [3]:
def get_ric_codes(company_names):
    ric_dict = {}
    for company in company_names:
        try:
            company_df = rd.discovery.search(
                view=search.Views.ORGANISATIONS,
                filter=f"CommonName xeq '{company}'",
                select="PrimaryRIC"
            )
            if not company_df.empty:
                ric_dict[company] = company_df.iloc[0]['PrimaryRIC']
        except Exception as e:
            print(f"Error fetching RIC for {company}: {e}")
    return ric_dict

## Step 2: Fetch Firm Data (Nodes)

In [4]:
def fetch_firm_data(ric_list):
    fields = ['TR.CommonName', 
              'TR.RIC', 
              'TR.HeadquartersCountry',
              'TR.CompanyMarketCapitalization(Curn=USD)',
              'TR.F.RevGoodsSrvc5YrAvg(Period=FY0,Curn=USD)'
             ]
    firm_data = rd.get_data(universe=ric_list, fields=fields)
    
    return firm_data

## Step 3: Fetch Supplier-Buyer Relationships (Edges)

In [15]:
def fetch_supplier_buyer_relationships(ric_list):
    fields = ['TR.SCRelationship.ScorgIDOut',
              'TR.SCRelationship', 
              'TR.SCRelationship.instrument', 
              'TR.SCRelationshipConfidenceScore'
             ]
    ties_data = rd.get_data(universe=ric_list, fields=fields)
    ties_df = ties_data[ties_data['Value Chains Relationship Confidence Score'] >= 0.5]
    ties_df = ties_df.iloc[:, 1:]
    print(ties_df)
    return ties_df

## Step 4: Match Supplier-Buyer Names

In [6]:
def match_supplier_buyer_names(df_relationships, df_firms):
    name_dict = df_firms.set_index('TR.RIC')['TR.CommonName'].to_dict()
    df_relationships['SupplierName'] = df_relationships['TR.SCRelationship.instrument'].map(name_dict)
    df_relationships['BuyerName'] = df_relationships['TR.SCRelationship.ScorgIDOut'].map(name_dict)
    
    return df_relationships

## Step 5: Expand the search if new firms are found

In [7]:
def expand_search(ric_list, df_relationships):
    new_firms = set(df_relationships['TR.SCRelationship.ScorgIDOut']).union(set(df_relationships['TR.SCRelationship.instrument']))
    new_firms = list(new_firms.difference(ric_list))
    
    return new_firms

## Step 6: Export Data to CSV

In [8]:
def save_data(df_firms, df_relationships):
    df_firms.to_csv('firm_data.csv', index=False)
    df_relationships.to_csv('relationship_data.csv', index=False)

## Step 7: Visualize Network

In [9]:
def visualize_network(df_relationships):
    G = nx.Graph()
    for _, row in df_relationships.iterrows():
        G.add_edge(row['SupplierName'], row['BuyerName'])
    plt.figure(figsize=(10, 6))
    nx.draw(G, with_labels=True, node_size=50, font_size=8)
    plt.show()

## Step 8: Convert for R

In [10]:
def convert_to_r_format(df_relationships):
    adj_matrix = nx.to_numpy_array(nx.from_pandas_edgelist(df_relationships, 'SupplierName', 'BuyerName'))
    pd.DataFrame(adj_matrix).to_csv('network_matrix.csv', index=False)

## Debugging section

In [27]:
'''
company_names = ["Intel Corp"]
ric_dict = get_ric_codes(company_names)
'''

'\ncompany_names = ["Intel Corp"]\nric_dict = get_ric_codes(company_names)\n'

## Main execution code body

In [18]:
#company_names = ["Intel Corp", 
#                 "Texas Instruments Inc", 
#                 "SK Hynix Inc", 
#                 "Taiwan Semiconductor Manufacturing Co Ltd"
#                ]

ric_dict = get_ric_codes(company_names)
ric_list = list(ric_dict.values())

df_firms = fetch_firm_data(ric_list)
df_relationships = fetch_supplier_buyer_relationships(ric_list)
df_relationships = match_supplier_buyer_names(df_relationships, df_firms)

new_firms = expand_search(ric_list, df_relationships)
while new_firms and len(ric_list) < 1000:  # Limit expansion to 1000 firms
    ric_new_dict = get_ric_codes(new_firms)
    new_ric_list = list(ric_new_dict.values())
    ric_list.extend(new_ric_list)
    
    df_new_firms = fetch_firm_data(new_ric_list)
    df_new_relationships = fetch_supplier_buyer_relationships(new_ric_list)
    df_new_relationships = match_supplier_buyer_names(df_new_relationships, df_new_firms)
    
    df_firms = pd.concat([df_firms, df_new_firms], ignore_index=True)
    df_relationships = pd.concat([df_relationships, df_new_relationships], ignore_index=True)
    
    new_firms = expand_search(ric_list, df_new_relationships)

save_data(df_firms, df_relationships)
visualize_network(df_relationships)
convert_to_r_format(df_relationships)

    Instrument Related OrganizationID Instrument  \
0       INTC.O             4298009357     INTC.O   
1       INTC.O             4295894443     INTC.O   
2       INTC.O             4297314987     INTC.O   
3       INTC.O             4295864818     INTC.O   
4       INTC.O             4295906779     INTC.O   
..         ...                    ...        ...   
426    2330.TW             4295905573    2330.TW   
427    2330.TW             4295905573    2330.TW   
428    2330.TW             4295884869    2330.TW   
429    2330.TW             4295914405    2330.TW   
430    2330.TW             4295914405    2330.TW   

     Value Chains Relationship Confidence Score  
0                                      0.262778  
1                                      0.263386  
2                                      0.263613  
3                                      0.263768  
4                                      0.264153  
..                                          ...  
426                      

KeyError: "None of ['TR.RIC'] are in the columns"