In [1]:
#Set up
import seaborn as sns
import pandas as pd
from datetime import datetime
import ast
import os
import noteql
import matplotlib.pyplot as plt
from time import sleep

# Organisation-Identifier Networking

> Organisations publishing IATI data declare participating organisations, which have various roles in their activities. By using Organisation-Identifiers, participating organisations can be uniquely identified, allowing us to create a network.

> This application visualises the network of shared organisation references in IATI data, based on the participating-org/@ref element. You can create your own visualisation based on type of reporting organisation, specific countries, or using a list of specific organisations. 

Example network:

In [2]:
from IPython.core.display import HTML
HTML('<div class="flourish-embed flourish-chord" data-src="visualisation/17570004" data-width="28%"><script src="https://public.flourish.studio/resources/embed.js"></script></div>')

<hr>

In [3]:
# function to add flexibility to change width and viz_id and also error handling

from IPython.core.display import HTML

def show_flourish(viz_id="17570004", width="28%"): # defult values 
    try:
        html = f'<div class="flourish-embed flourish-chord" data-src="visualisation/{viz_id}" data-width="{width}"><script src="https://public.flourish.studio/resources/embed.js"></script></div>'
        return HTML(html)
    except Exception as e:
        print(f"Failed to load visualization: {e}")
        return None


show_flourish()  # Uses defaults (17570004, 28%)


## Running the application:

1. Select the type of visualisation you want to create from the dropdown menu: A. Reporting organisation type, B. Country, or C. A list of reporting organisations.

In [4]:
vis_type = 'Organisations by Type'

2A. If you are interested in specific organisation types, select them using the dropdown menu. Selecting a large amount of organisation types may cause this application to run slowly or fail.

In [5]:
rep_org_type = ['10']

**Organisation Types:**

**Code** | **Name**     |
|---------|---------------------------------|
| 10 | Government |
| 11 | Local Government |
| 15 | Other Public Sector |
| 21 | International NGO |
| 22 | National NGO |
| 23 | Regional NGO |
| 24 | Partner Country based NGO | 
| 30 | Public Private Partnership |	
| 40 | Multilateral |
| 60 | Foundation |
| 70 | Private Sector |
| 71 | Private Sector in Provider Country |
| 72 | Private Sector in Aid Recipient Country |
| 73 | Private Sector in Third Country |
| 80 | Academic, Training and Research | 	
| 90 | Other 


Get info country selector dropdown.

In [6]:
#download latest registry info
!wget https://iatiregistry.org/publisher/download/csv
registry = pd.read_csv("csv")
!rm -rf "csv" 

countries = sorted(registry['HQ Country or Region'].unique())

In [7]:
try:
    !wget https://iatiregistry.org/publisher/download/csv
    registry = pd.read_csv("csv")
    countries = sorted(registry['HQ Country or Region'].unique())
except Exception as e:
    print(f"Error downloading or processing registry: {e}")
finally:
    !rm -rf "csv"  # Clean up regardless of success/failure

2B.  If you are interested in reporting organisations based in a specific country or countries, select them using the dropdown menu. You can find out where an organisation is based using the registry. Selecting a large number of countries may cause this application to run slowly or fail.

In [8]:
country_query = ['(No country assigned)']

2C.  If you are interested in a list of specific reporting organisations, enter the Organisation Identifiers of the IATI Publishers you want to select into the text box, copying this formatting exactly: 'AU-5', '44000'.  You can find these identifiers using the registry.

In [9]:
pub_org_id = ''

Reporting organisation identifiers:

In [10]:
registry[['Publisher', 'IATI Organisation Identifier','HQ Country or Region']]

3. Click the "Run" button.

In [11]:
if vis_type in ['Organisations by Type']:
    print(f"Last run on organisation types:",*rep_org_type)
elif vis_type in ['Country']:
    print(f"Last run on organisations with headquarters in:",*country_query)
elif vis_type in ['Specific Organisations']:
    print(f"Last run on reporting organisations:",*pub_org_id)

In [12]:
def print_status(vis_type, data):
    if vis_type in ['Organisations by Type']:
        print(f"Last run on organisation types:", *data)
    elif vis_type in ['Country']:
        print(f"Last run on organisations with headquarters in:", *data)
    elif vis_type in ['Specific Organisations']:
        print(f"Last run on reporting organisations:", *data)
    else:
        print("Invalid visualization type selected")


# print_status(vis_type, rep_org_type)  # for organization types
# print_status(vis_type, country_query)  # for countries
# print_status(vis_type, pub_org_id)     # for specific organizations

In [13]:
print_status(vis_type, rep_org_type)

# Download data

## Setup

Using https://iati-tables.opendata.coop/. The following cell sets up the basic libraries needed for analysis and makes a connection to the database. 

In [14]:
sns.set_context('notebook')

#start noteql session
session = noteql.local_db_session()
# Restart postgres to make sure any existing connections get dropped
!sudo service postgresql restart
session = noteql.Session(datasette_url='https://datasette.codeforiati.org/iati.json', connect_args={'connect_timeout': 1000})

In [15]:
def setup_database_connection(timeout=1000):
    try:
        sns.set_context('notebook')
        
        print("Initializing database connection...")
        
        # Restarting PostgreSQL safely
        try:
            !sudo service postgresql restart
            sleep(2)  # Giving PostgreSQL time to restart
            print("PostgreSQL restarted successfully")
        except Exception as e:
            print(f"Warning: PostgreSQL restart failed: {e}")
        
        # Creating new session
        session = noteql.Session(
            datasette_url='https://datasette.codeforiati.org/iati.json',
            connect_args={'connect_timeout': timeout}
        )
        
        print("Database connection established successfully")
        return session
        
    except Exception as e:
        print(f"Error setting up database connection: {e}")
        return None

In [16]:
session = setup_database_connection()

## IATI-tables Queries

Downloading all participating organisations reported by selected reporting org type, countries, or ids.

Prepare country filter - based on ISO code as much as possible.

In [17]:
#find country iso codes
iso_codes = pd.read_csv("Country.csv")
iso_codes = iso_codes[iso_codes['name'].isin(country_query)]
iso_codes['code']

In [18]:
def get_country_iso_codes(country_query, csv_path="Country.csv"):
    try:
        # Reading country codes
        print(f"Loading country codes from {csv_path}...")
        iso_codes = pd.read_csv(csv_path)
        
        # Filtering based on country query
        filtered_codes = iso_codes[iso_codes['name'].isin(country_query)]
        
        if filtered_codes.empty:
            print(f"Warning: No ISO codes found for countries: {', '.join(country_query)}")
            return None
            
        print(f"Found ISO codes for {len(filtered_codes)} countries")
        return filtered_codes['code']
        
    except FileNotFoundError:
        print(f"Error: Country code file '{csv_path}' not found")
        return None
    except Exception as e:
        print(f"Error processing country codes: {e}")
        return None

In [19]:
iso_codes = get_country_iso_codes(country_query)

In [20]:
iso_codes

In [21]:
#filter reporting orgs to countries of interest
#should I include filtering to org-id with prefix too??
filt_reg = registry[registry['HQ Country or Region'].isin(country_query)]
#split iso codes from IDs
split_id = filt_reg['IATI Organisation Identifier'].str.split('-',n=1, expand=True)
country_orgs = pd.concat([filt_reg , split_id], axis=1)
country_orgs

In [22]:
def process_country_organizations(registry, country_query):
    try:
        # Filtering organizations by country
        print(f"Filtering organizations for countries: {', '.join(country_query)}")
        filtered_orgs = registry[registry['HQ Country or Region'].isin(country_query)]
        
        if filtered_orgs.empty:
            print("No organizations found for specified countries")
            return None
            
        # Spliting IATI identifiers into prefix and ID
        split_identifiers = filtered_orgs['IATI Organisation Identifier'].str.split('-', n=1, expand=True)
        
        # Combining original data with split identifiers
        result = pd.concat([filtered_orgs, split_identifiers], axis=1)
        
        print(f"Found {len(result)} organizations")
        return result
        
    except Exception as e:
        print(f"Error processing organizations: {e}")
        return None

In [23]:
country_orgs = process_country_organizations(registry, country_query)

In [24]:
country_orgs

Format input text

In [25]:
pub_orgs = ast.literal_eval(pub_org_id)
pub_orgs = [n.strip() for n in pub_orgs] 
pub_orgs

In [26]:
def process_org_ids(pub_org_id):
    try:
        org_list = ast.literal_eval(pub_org_id)
        
        cleaned_orgs = [org.strip() for org in org_list]
        
        print(f"Processed {len(cleaned_orgs)} organization IDs")
        return cleaned_orgs
        
    except ValueError as ve:
        print(f"Invalid format for organization IDs: {ve}")
        return None
    except SyntaxError as se:
        print("Invalid syntax in organization IDs string")
        return None
    except Exception as e:
        print(f"Error processing organization IDs: {e}")
        return None

In [27]:
pub_orgs = process_org_ids(pub_org_id)

In [28]:
pub_orgs 

In [29]:
outputs = {}

if vis_type in ['Organisations by Type']:
    for i in rep_org_type:
        #tables query 
        df = %nql SELECT DISTINCT a.prefix, a.reportingorg_ref, b.reportingorg_type, b.reportingorg_typename, a.ref, a.type, a.typename, a.narrative FROM participatingorg as a LEFT JOIN activity as b on (b._link = a._link_activity) WHERE (b.reportingorg_type = {{i}}) ORDER BY a.narrative
        outputs[i] = df
elif vis_type in ['Country']:
    for i in country_orgs['IATI Organisation Identifier']:
        #tables query 
        df = %nql SELECT DISTINCT a.prefix, a.reportingorg_ref, a.ref, a.type, a.typename, a.narrative FROM participatingorg as a WHERE (a.reportingorg_ref = {{i}}) 
        outputs[i] = df
elif vis_type in ['Specific Organisations']:
    for i in pub_org_id :
        #tables query 
        df = %nql SELECT DISTINCT a.prefix, a.reportingorg_ref, a.ref, a.type, a.typename, a.narrative FROM participatingorg as a WHERE (a.reportingorg_ref = {{i}})
        outputs[i] = df

In [30]:
part_orgs =  pd.concat(outputs.values(), ignore_index=True)
part_orgs

In [31]:
def run_iati_queries(vis_type, rep_org_type=None, country_orgs=None, pub_org_id=None):
    try:
        outputs = {}
        
        if vis_type in ['Organisations by Type']:
            print(f"Querying data for {len(rep_org_type)} organization types...")
            for i in rep_org_type:
                try:
                    df = %nql SELECT DISTINCT a.prefix, a.reportingorg_ref, b.reportingorg_type, b.reportingorg_typename, a.ref, a.type, a.typename, a.narrative FROM participatingorg as a LEFT JOIN activity as b on (b._link = a._link_activity) WHERE (b.reportingorg_type = {{i}}) ORDER BY a.narrative
                    outputs[i] = df
                    print(f"Processed organization type: {i}")
                except Exception as e:
                    print(f"Error processing org type {i}: {e}")
                    
        elif vis_type in ['Country']:
            print(f"Querying data for {len(country_orgs)} country organizations...")
            for i in country_orgs['IATI Organisation Identifier']:
                try:
                    df = %nql SELECT DISTINCT a.prefix, a.reportingorg_ref, a.ref, a.type, a.typename, a.narrative FROM participatingorg as a WHERE (a.reportingorg_ref = {{i}})
                    outputs[i] = df
                    print(f"Processed organization: {i}")
                except Exception as e:
                    print(f"Error processing org {i}: {e}")
                    
        elif vis_type in ['Specific Organisations']:
            print(f"Querying data for {len(pub_org_id)} specific organizations...")
            for i in pub_org_id:
                try:
                    df = %nql SELECT DISTINCT a.prefix, a.reportingorg_ref, a.ref, a.type, a.typename, a.narrative FROM participatingorg as a WHERE (a.reportingorg_ref = {{i}})
                    outputs[i] = df
                    print(f"Processed organization: {i}")
                except Exception as e:
                    print(f"Error processing org {i}: {e}")
        
        print(f"Successfully processed {len(outputs)} queries")
        return outputs
        
    except Exception as e:
        print(f"Error in query execution: {e}")
        return None

In [32]:
part_orgs =  pd.concat(outputs.values(), ignore_index=True)
part_orgs

In [33]:
!rm -rf part_orgs.xlsx
part_orgs.to_excel("part_orgs.xlsx",sheet_name="part_orgs", index=False)

# Clean data

## Setup

In [34]:
session2 = noteql.local_db_session()
# Restart postgres to make sure any existing connections get dropped
!sudo service postgresql restart
# We can't get the noteql session to do these, because it wraps sql in a transaction, which isn't allowed for DROP/CREATE
!sudo -u postgres psql -c "DROP DATABASE IF EXISTS pa"
!sudo -u postgres psql -c "CREATE DATABASE pa"
session2 = noteql.Session("postgresql+psycopg2://root@/pa", "public")

In [35]:
def setup_clean_database(db_name="iati_clean"):
    try:
        # Restarting PostgreSQL to ensure clean connections
        print("Restarting PostgreSQL service...")
        !sudo service postgresql restart
    
        print(f"Dropping database '{db_name}' if it exists...")
        !sudo -u postgres psql -c "DROP DATABASE IF EXISTS {db_name}"
        
        print(f"Creating new database '{db_name}'...")
        !sudo -u postgres psql -c "CREATE DATABASE {db_name}"
        
        # Creating new session with the new database
        session2 = noteql.Session(f"postgresql+psycopg2://root@/{db_name}", "public")
        print("Clean database setup completed successfully.")
        return session2
        
    except Exception as e:
        print(f"Error setting up clean database: {e}")
        return None

In [36]:
session2 = setup_clean_database()

## Load data

In [37]:
part_orgs = pd.read_excel('part_orgs.xlsx',sheet_name=None)

for key in part_orgs.keys():
    table_name = key.replace(' ','')
    table_name = table_name.lower()
    print(table_name)
    %nql DROP TABLE IF EXISTS {{table_name | ident}}; SELECT ''
    # Convert to a DataFrame and render
    df = pd.DataFrame.from_dict(part_orgs[key])
    session2.load_dataframe(df, table_name, index=False)

In [38]:
def load_excel_data(excel_file='part_orgs.xlsx'):
    try:
        print(f"Reading Excel file: {excel_file}")
        part_orgs = pd.read_excel(excel_file, sheet_name=None)
        
        for key in part_orgs.keys():
            try:
                # Creating table name
                table_name = key.replace(' ','').lower()
                print(f"Processing sheet: {key} as table: {table_name}")
                
                # Dropping existing table and create new one
                %nql DROP TABLE IF EXISTS {{table_name | ident}}; SELECT ''
                
                # Converting to DataFrame and loading
                df = pd.DataFrame.from_dict(part_orgs[key])
                session2.load_dataframe(df, table_name, index=False)
                print(f"Loaded {len(df)} rows into {table_name}")
                
            except Exception as e:
                print(f"Error processing sheet {key}: {e}")
                
    except Exception as e:
        print(f"Error reading Excel file: {e}")

In [39]:
!wget https://iatistandard.org/reference_downloads/203/codelists/downloads/clv3/csv/en/OrganisationRegistrationAgency.csv
!wget https://iatistandard.org/reference_downloads/203/codelists/downloads/clv3/csv/en/OrganisationIdentifier.csv 

reg_agency = pd.read_csv("OrganisationRegistrationAgency.csv")
old_orgid = pd.read_csv("OrganisationIdentifier.csv")
refs = pd.read_csv("Generic_XM-DACs_Jul24.csv")

session2.load_dataframe(registry, "registry", index=False)
session2.load_dataframe(reg_agency, "reg_agency", index=False)
session2.load_dataframe(old_orgid, "old_orgid", index=False)
session2.load_dataframe(refs,"refs",index=False)

!rm -rf "OrganisationRegistrationAgency.csv" "OrganisationIdentifier.csv" 

In [40]:
def load_reference_data():
    try:
        # Downloading files
        print("Downloading reference files...")
        !wget https://iatistandard.org/reference_downloads/203/codelists/downloads/clv3/csv/en/OrganisationRegistrationAgency.csv
        !wget https://iatistandard.org/reference_downloads/203/codelists/downloads/clv3/csv/en/OrganisationIdentifier.csv
        
        # Reading CSV files
        print("Reading CSV files...")
        reg_agency = pd.read_csv("OrganisationRegistrationAgency.csv")
        old_orgid = pd.read_csv("OrganisationIdentifier.csv")
        refs = pd.read_csv("Generic_XM-DACs_Jul24.csv")
        
        # Loading DataFrames to database
        print("Loading data to database...")
        data_frames = {
            "registry": registry,
            "reg_agency": reg_agency,
            "old_orgid": old_orgid,
            "refs": refs
        }
        
        for table_name, df in data_frames.items():
            try:
                session2.load_dataframe(df, table_name, index=False)
                print(f"Loaded {len(df)} rows into {table_name}")
            except Exception as e:
                print(f"Error loading {table_name}: {e}")
        
        # Cleaning up
        print("Cleaning up downloaded files...")
        !rm -rf "OrganisationRegistrationAgency.csv" "OrganisationIdentifier.csv"
        
        print("Reference data loading completed")
        
    except Exception as e:
        print(f"Error in reference data loading: {e}")


In [41]:
load_excel_data()
load_reference_data()

- Remove obvious issues such as blank references 

- Remove all/part of the data from reporting orgs with known issues which mean a participating organisation could not be identified. 

- Trim leading/trailing whitespace from narratives.

- Remove generic XM-DAC codes.

Other issues will be caught by the filter data section, which checks for valid organisation-ids with a format of {RegistrationAgency}-{RegistrationNumber}.

In [42]:
refs['code'] = refs['code'].astype(str) 
refs['XMDAC'] = "XM-DAC-"+refs['code']
refs

In [43]:
def clean_organization_data(refs_df):
    try:
        print("Starting data cleaning process...")
        
        # Converting codes to string and create XMDAC format
        refs_df['code'] = refs_df['code'].astype(str)
        refs_df['XMDAC'] = "XM-DAC-" + refs_df['code']
        
        # Removing blank references
        refs_df = refs_df.dropna(subset=['code'])
        
        # Triming whitespace from narratives if they exist
        narrative_columns = [col for col in refs_df.columns if 'narrative' in col.lower()]
        for col in narrative_columns:
            refs_df[col] = refs_df[col].astype(str).str.strip()
            
        print(f"Cleaned {len(refs_df)} organization references")
        return refs_df
        
    except Exception as e:
        print(f"Error during data cleaning: {e}")
        return None

In [44]:
refs = clean_organization_data(refs)

In [45]:
refs

In [46]:
%%nql SHOW CREATE cleaned_part_orgs

SELECT DISTINCT
    prefix,
    reportingorg_ref,
    ref,
    type,
    typename,
    CASE
        WHEN
            SPLIT_PART(ref,'-',2) = '' THEN ''
            ELSE CONCAT(SPLIT_PART(ref,'-',1),'-',SPLIT_PART(ref,'-',2))
    END as "reg_agency",
    TRIM(REGEXP_REPLACE(narrative, E'\n', '')) as "narrative"
FROM part_orgs
WHERE NOT narrative in ('',' ','EN: ','Odefinierat','EN: IP not published','Not applicable',
                        'Confidential',
                        'USAID redacted this field in accordance with the exceptions outlined in the Foreign Aid Transparency and Accountability Act of 2016.')
    AND NOT POSITION('Private donors' IN narrative) > 0
    AND NOT POSITION('Övrigt' IN narrative) > 0
    AND NOT POSITION('Övriga' IN narrative) > 0
    AND CASE
        WHEN prefix='unicef' THEN NOT POSITION('XM-DAC-41122-VN-' IN ref) > 0 
        ELSE true
    END
    AND NOT ref in (' ','Not registered in IATI','0')
    AND NOT ref in {{ refs['code'] | inclause }}
    AND NOT ref in {{ refs['XMDAC'] | inclause }}

## Add other data sources

- Join to registry IDs, registration agency, and old IATI codelist. 

- Add name of reporting-org from registry. If the reporting org reference does not match the registry, it will be removed. 

In [47]:
%%nql SHOW CREATE joined_part_orgs 
SELECT 
    e."Publisher" as "reportingorg_name",
    a.*,
    b."Publisher" as "registry_narrative",
    c.name as "reg_agency_narrative",
    d.name as "v1_orgid"
FROM cleaned_part_orgs as a
    LEFT JOIN registry as b ON a.ref = b."IATI Organisation Identifier"
    LEFT JOIN reg_agency as c ON a.reg_agency = c.code
    LEFT JOIN old_orgid as d ON a.ref = d.code
    LEFT JOIN registry as e ON a.reportingorg_ref = e."IATI Organisation Identifier"
WHERE NOT e."Publisher" = 'None'
ORDER BY a.prefix

## Filter data

Check that the participating organisation references:

- Are a valid reporting org ID from the registry OR

- Have a valid prefix from org-id UNLESS

- They are a v1 org id

In [48]:
%%nql SHOW CREATE filtered_orgs output=DF
SELECT
    *
FROM joined_part_orgs 
WHERE NOT reg_agency_narrative = 'None'
    OR NOT registry_narrative = 'None'
    OR NOT v1_orgid = 'None'
ORDER BY prefix

In [49]:
!rm -rf output.xlsx
output.to_excel("output.xlsx",sheet_name="output", index=False)

output_file_url = f"https://deepnote.com/publish/{os.environ['DEEPNOTE_PROJECT_ID']}/file?path=output.xlsx"
print(output_file_url)

# Summarise

<hr>

## Chord counts - to download

Code source: https://stackoverflow.com/questions/64689296/counting-shared-elements-within-groups-with-condition?rq=3 

Chord output for use with bokeh:

In [50]:
#Get unique reporting org name and participating org reference combinations
df = output[["reportingorg_name","ref"]].drop_duplicates()

result = df.merge(df, on='ref').groupby(['reportingorg_name_x','reportingorg_name_y']).count().reset_index()
result.columns = ['source', 'target', 'value']
mask = result.source!=result.target

chord_df = result[mask].reset_index(drop='True')
chord_df  = chord_df.sort_values(by="value", ascending=False)
chord_df

In [51]:
def create_chord_data(output_df):
    try:
        # Getting unique combinations
        df = output_df[["reportingorg_name", "ref"]].drop_duplicates()
        
        # Creating connections matrix
        result = df.merge(df, on='ref').groupby(['reportingorg_name_x', 'reportingorg_name_y']).count().reset_index()
        result.columns = ['source', 'target', 'value']
        
        # Filtering self-connections and sorting
        chord_df = result[result.source != result.target].reset_index(drop=True)
        chord_df = chord_df.sort_values(by="value", ascending=False)
        
        print(f"Created chord diagram data with {len(chord_df)} connections")
        return chord_df
        
    except Exception as e:
        print(f"Error creating chord data: {e}")
        return None

In [52]:
chord_df = create_chord_data(output)
chord_df

## Networking Visualisation

Use Bokeh to create a chord diagram

In [53]:
import holoviews as hv
import numpy as np
from holoviews import opts, dim
from bokeh.plotting import figure, output_file, save, show
from IPython.display import IFrame
from IPython.core.display import display, HTML
import tempfile

Workaround to use Bokeh in Deepnote

In [54]:
def bokeh_deepnote_show(plot):
    tmp_output_filename = tempfile.NamedTemporaryFile(suffix='.html').name
    output_file(tmp_output_filename)
    save(plot)

    f = open(tmp_output_filename, "r")
    display(HTML(f.read()))

In [55]:
def bokeh_deepnote_show(plot):
    try:
        # Creating temporary file with HTML extension
        tmp_file = tempfile.NamedTemporaryFile(suffix='.html', delete=False)
        
        # Saving plot to temporary file
        output_file(tmp_file.name)
        save(plot)
        
        # Reading and display the plot
        with open(tmp_file.name, "r") as f:
            display(HTML(f.read()))
            
        # Cleaning up
        os.unlink(tmp_file.name)
        
    except Exception as e:
        print(f"Error displaying plot: {e}")

In [56]:
#set first variable to true for first run through
first = True

In [68]:
#set first variable to true for first run through
first = True

def initialize_visualization():
    try:
        global first
        first = True
        print("Visualization initialized. Use slider to adjust minimum shared references.")
        return True
    except Exception as e:
        print(f"Error initializing: {e}")
        return False

In [69]:
initialize_visualization()

Adjust the minimum number of shared participating organisation references by using the slider. Use the "visualise" button to refresh the diagram.

In [58]:
filter = 0

chord = False

In [60]:
if first or chord:
    mask = np.logical_and(result.source<result.target, result.value>0)
    bokeh_df = result[mask].reset_index(drop='True')
    bokeh_df  = bokeh_df.sort_values(by="source")
    
bokeh_df

In [61]:
if first or chord:
    hv.extension('bokeh')

#filter out orgs with few in common
filtered_data = bokeh_df[bokeh_df['value'] >= filter]
    
#Create chord labels
orgs = list(set(filtered_data ["source"].unique().tolist() + filtered_data ["target"].unique().tolist()))
orgs_dataset = hv.Dataset(pd.DataFrame(orgs, columns=["Organisation"]))
chord = hv.Chord((filtered_data,orgs_dataset))

chord.opts(cmap = 'Category20',  
         edge_cmap = 'Category20',  
         labels = 'Organisation',
         node_color = hv.dim('Organisation').str(),
         #edge_color = hv.dim('source').str(), 
          title = 'Shared Participating Organisation References',
         width = 900,
         height = 900)

bokeh_deepnote_show(hv.render(chord))

# Output

In [62]:
if first or chord:
    !rm -rf chord_df.xlsx
chord_df.to_excel("chord_df.xlsx",sheet_name="chord_df", index=False)

chord_file_url = f"https://deepnote.com/publish/{os.environ['DEEPNOTE_PROJECT_ID']}/file?path=chord_df.xlsx"
print(chord_file_url)

<hr>

# Further visualisation

To create and edit your own diagram, you may wish to use the data visualisation tool "Flourish". 

You can download the chord data produced by this application here: Download.

Example flourish chord diagram:

In [63]:
from IPython.core.display import HTML
HTML('<div class="flourish-embed flourish-chord" data-src="visualisation/17570004" data-width="29%"><script src="https://public.flourish.studio/resources/embed.js"></script></div>')

<hr>

## How is the data cleaned?

Organisation identifiers with known issues are removed - these include redacted or blank narratives, and internal or generic references, as they cannot be used for the networking of IATI data. This is not an exhaustive list and is subject to change.  

Blank/internal/generic references

 Blank/redacted narratives

| **Organisation** | **Redaction message/Issue**     |
|---------|---------------------------------|
| iom     | Confidential                    |
| sida | Övriga/Övrigt ... (Other) |
| unicef  | EN: IP not published            |
| unhcr   | Private donors in...             |
|usaid | USAID redacted this field in accordance with the exceptions outlined in the Foreign Aid Transparency and Accountability Act of 2016.|

| **Organisation** | **Redaction message/Issue**     |
|------------------|---------------------------------|
| unicef            | Internal ID eg XM-DAC-41122-VN-242363 |
| Multiple orgs | Generic XM-DAC- codes used |

TO DO - compare DAC codes to https://github.com/pwyf/aid-transparency-tracker/blob/main/beta/excluded_xm_dac.py

Participating organisation identifiers must have a valid format of {RegistrationAgency}-{RegistrationNumber}, or match a known organisation-identifier as recorded in the registry.

You can download the cleaned data here: Download

<hr>

## Data Snapshots

Number of reporting organisations referencing each participating organisation identifier :

In [64]:
%%nql SHOW CREATE ref_count ref_count=DF

SELECT 
    ref as "Participating Organisation Identifier",
    COUNT(DISTINCT(reportingorg_ref)) as "Count" 
FROM filtered_orgs 
GROUP BY ref
ORDER BY "Count" DESC

Most commonly referenced participating organisation identifiers - Top 25

In [65]:
plt.barh("Participating Organisation Identifier", "Count", data = ref_count.loc[0:24])
plt.xlabel('Count of reporting organisations')
plt.ylabel('Participating Organistion Reference')
plt.show()

Count of unique valid participating organisation identifiers referenced by each reporting organisation:

In [66]:
%%nql SHOW CREATE reporg_count reporg_count=DF

SELECT
    reportingorg_name as "Reporting Organisation",
    COUNT(DISTINCT(ref)) as "Count of Participating Organisation Identifiers"
FROM filtered_orgs 
GROUP BY reportingorg_name
ORDER BY "Count of Participating Organisation Identifiers" DESC

Reporting organisations with most valid participating organisation identifiers - Top 25

In [67]:
plt.barh('Reporting Organisation', 'Count of Participating Organisation Identifiers', 
            data = reporg_count.loc[0:24])
plt.xlabel('Count of Participating Organisation References')
plt.ylabel('Reporting Organistion')
plt.show()

<hr>

### Footnote

The code behind this application is hidden to improve usability. You are welcome to view this code at: Notebook.

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=240fcfd1-8557-41e6-8271-b13ecef554c3' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>