# Project and service provision timelines
Upgrade plotly as we require the timelines method from plotly express.

In [None]:
%pip install plotly>=4.14.3

## INDIGO database API endpoint
Setup INDIGO database API endpoint and helper methods for getting individual items from the API. This can be used with the `project`, `fund`, `organisation` and `assessment_resource` endpoints.

In [None]:
import requests
import textwrap

from datetime import datetime

import pandas as pd
import plotly.express as px


INDIGO_DATABASE_API = 'https://golab-indigo-data-store.herokuapp.com/app/api1'


def api_get_item(endpoint, public_id):
    """
    Get individual item details from the API

    E.g. 
    item = api_get_item('project', 'INDIGO-POJ-0158')
    """
    try:
        response = requests.get(f'{INDIGO_DATABASE_API}/{endpoint}/{public_id}')
        item = response.json()
        return item
    except Exception as e:
        print(f'\nFailed to retrieve {endpoint} "{public_id}".\nError: {e}')
        return False


## Helper methods
Setup a method to simplify extracting values from the nested data dictionary. We also setup various helper methods for generating plot data for the visualisation. Most of these methods deal with calculating start/end dates.

In [None]:
ACCEPTED_DATE_FORMATS = ['%Y-%m-%d', '%Y-%m', '%Y']


def extract_value(data, keys, default):
    """
    Safe method to get value from nested dictionary with default value fallback
    """
    try:
        result = data
        for key in keys:
            if result == default:
                break
            result = result.get(key) or default
        return result

    except Exception as e:
        print(f'Error: {e}')
        return default


def _convert_date_string_to_timestamp(time_string):
    for format in ACCEPTED_DATE_FORMATS:
        try:
            return datetime.strptime(time_string, format).timestamp()
        except ValueError:
            pass


def _get_project_provision_start_end_dates(project_data):

    project_start = extract_value(project_data, ['dates', 'outcomes_contract_signed', 'value'], '')
    if isinstance(project_start, str) and project_start.strip():
        project_start = project_start.strip()[0:7]
        project_start_unix = _convert_date_string_to_timestamp(project_start)
    else:
        project_start_unix = None

    provision_start = extract_value(project_data, ['dates', 'start_date_of_service_provision', 'value'], '')
    if isinstance(provision_start, str) and provision_start.strip():
        provision_start = provision_start.strip()[0:7]
        provision_start_unix = _convert_date_string_to_timestamp(provision_start)
    else:
        provision_start_unix = None

    fields_project_end = [
        ['dates', 'actual_completion_date', 'value'],
        ['dates', 'anticipated_completion_date', 'value'],
    ]

    project_end_unix = None
    for field in fields_project_end:
        project_end = extract_value(project_data, field, '')
        if isinstance(project_end, str) and project_end.strip():
            project_end = project_end.strip()[0:7]
            project_end_unix = _convert_date_string_to_timestamp(project_end)
            break

    fields_provision_end = [
        ['dates', 'actual_end_date_of_service_provision', 'value'],
        ['dates', 'anticipated_end_date_of_service_provision', 'value'],
    ]

    provision_end_unix = None
    for field in fields_provision_end:
        provision_end = extract_value(project_data, field, '')
        if isinstance(provision_end, str) and provision_end.strip():
            provision_end = provision_end.strip()[0:7]
            provision_end_unix = _convert_date_string_to_timestamp(provision_end)
            break

    return project_start_unix, project_end_unix, provision_start_unix, provision_end_unix

## Plot helper methods


In [None]:
def _xaxis_formatting(subplot):
    return {
        f'xaxis{subplot}': {
            'showgrid': True,
            'gridcolor': LINE_COLOUR,
            'showline': True,
            'linewidth': 3,
            'linecolor': LINE_COLOUR,
            'mirror': True,
        }
    }


def _yaxis_formatting(subplot):
    return {
        f'yaxis{subplot}': {
            'showgrid': True,
            'gridcolor': LINE_COLOUR,
            'showline': True,
            'ticksuffix': '  ',
            'linewidth': 1,
            'linecolor': LINE_COLOUR,
            'mirror': True,
        }
    }


def _build_timeline_dataframe(public_ids, data):
    df = pd.DataFrame(columns=['Project', 'Start', 'Finish', 'Type', 'Policy Sector'])

    for public_id in public_ids:
        try:
            project_data = data[public_id]['project']['data']
            project_title = project_data['name']['value']

            project_start, project_end, provision_start, provision_end = _get_project_provision_start_end_dates(project_data)

            if project_end is not None and project_start is not None and provision_start is not None and provision_end is not None:
                project_start_date = datetime.utcfromtimestamp(project_start).strftime('%Y-%m-%d')
                project_end_date = datetime.utcfromtimestamp(project_end).strftime('%Y-%m-%d')
                provision_start_date = datetime.utcfromtimestamp(provision_start).strftime('%Y-%m-%d')
                provision_end_date = datetime.utcfromtimestamp(provision_end).strftime('%Y-%m-%d')
                
                policy_sector = extract_value(
                    project_data,
                    ['purpose_and_classifications', 'policy_sector', 'value'],
                    'Unknown',
                )
                
                df = df.append(
                    {
                        'Project': '  <br>'.join(textwrap.wrap(project_title, 40)),
                        'Type': 'Project',
                        'Start': project_start_date,
                        'Finish': project_end_date,
                        'Policy Sector': policy_sector
                    },
                    ignore_index=True,
                )
                
                df = df.append(
                    {
                        'Project': '  <br>'.join(textwrap.wrap(project_title, 40)),
                        'Type': 'Service Provision',
                        'Start': provision_start_date,
                        'Finish': provision_end_date,
                        'Policy Sector': policy_sector
                    },
                    ignore_index=True,
                )

        except Exception:
            continue

    return df


def _calculate_extents(figure_data):
    new_extents = {}
    total_projects = sum([len(shape.y) for shape in figure_data if shape.name == 'Service Provision'])
    for shape in figure_data:
        if shape.name == 'Service Provision':
            yaxis_ref = '' if len(shape.yaxis) == 1 else ''.join([i for i in shape.yaxis if i.isdigit()])
            facet_projects = len(shape.y)
            proportional_extent = (1 / (total_projects / facet_projects))
            if yaxis_ref != '':
                proportional_extent = proportional_extent - PLOTLY_FACET_SPACING
            new_extents[f'yaxis{yaxis_ref}'] = [0, proportional_extent]

    return new_extents

## Get project data
Call the INDIGO API 'project' endpoint and retrieve the data for each of the projects of interest.

In [None]:
# Call the API and pull down the data for each project
# and store in a dictionary for use later.
public_ids = [
    'INDIGO-POJ-0167', 'INDIGO-POJ-0168', 'INDIGO-POJ-0169', 'INDIGO-POJ-0170', 'INDIGO-POJ-0171', 'INDIGO-POJ-0172',
    'INDIGO-POJ-0173', 'INDIGO-POJ-0174', 'INDIGO-POJ-0175', 'INDIGO-POJ-0176', 'INDIGO-POJ-0177', 'INDIGO-POJ-0178',
    'INDIGO-POJ-0179', 'INDIGO-POJ-0180', 'INDIGO-POJ-0181', 'INDIGO-POJ-0182', 'INDIGO-POJ-0183', 'INDIGO-POJ-0184',
    'INDIGO-POJ-0188', 'INDIGO-POJ-0189', 'INDIGO-POJ-0190', 'INDIGO-POJ-0192', 'INDIGO-POJ-0193', 'INDIGO-POJ-0194',
    'INDIGO-POJ-0195', 'INDIGO-POJ-0198', 'INDIGO-POJ-0199', 'INDIGO-POJ-0200', 'INDIGO-POJ-0201',
]

data = {}
endpoint = 'project'

for public_id in public_ids:
    data[public_id] = api_get_item(endpoint, public_id)

## Build the figure
Generate the plot data and build the figure

In [None]:
PLOTLY_FACET_SPACING = 0.03
LINE_COLOUR = '#cccccc'

df = _build_timeline_dataframe(public_ids, data)
df_sorted = df.sort_values(by='Start')

fig = px.timeline(
    df_sorted, 
    x_start='Start', 
    x_end='Finish', 
    y='Project', 
    color='Type',
    facet_row='Policy Sector', 
    color_discrete_map={'Project': '#00629b', 'Service Provision': '#d45087'},
)

fig.update_yaxes(autorange='reversed', automargin=True, dtick=1, title='', matches=None)
fig.update_xaxes(tickangle=45)

legend_y = (-3 / df.shape[0])
if legend_y < -2:
    legend_y = -2

fig.layout.update(height=(32 * df.shape[0]) + 220, legend_y=legend_y)
fig.update_traces(textposition='inside', width=0.25, selector=dict(type='bar'))

for shape in fig['data']:
    shape['offset'] = -0.25 if shape['legendgroup'] == 'Project' else 0

new_extents = _calculate_extents(fig.data)

new_domains = {}
previous_extent = 0
for i in range(len(new_extents)):
    n = '' if i == 0 else (i + 1)
    
    new_domains[f'yaxis{n}'] = {'domain': [previous_extent, previous_extent + new_extents[f'yaxis{n}'][1]]}
    previous_extent = (previous_extent + new_extents[f'yaxis{n}'][1]) + PLOTLY_FACET_SPACING

fig.layout.update(new_domains)

for i, annotation in enumerate(fig.layout.annotations):
    n = '' if i == 0 else (i + 1)
    
    yaxis_position = fig.layout[f'yaxis{n}']['domain'][1]
    annotation['textangle'] = 0
    annotation['font'] = {'size': 14}
    annotation['xanchor'] = 'center'
    annotation['yanchor'] = 'top'
    annotation['x'] = 0.5
    annotation['y'] = (yaxis_position) + 0.015
    fig.layout.update(_xaxis_formatting(n))
    fig.layout.update(_yaxis_formatting(n))

fig.update_layout(
    title='Project and service provision timelines',
    showlegend=True,
    legend_orientation='h',
    legend_yanchor='bottom',
)

fig.show()

## Important Notice and Disclaimer on INDIGO Data
<sub><sup>
INDIGO data are shared for research and policy analysis purposes. INDIGO data can be used to support a range of insights, for example, to understand the social outcomes that projects aim to improve, the network of organisations across projects, trends, scales, timelines and summary information. The collaborative system by which we collect, process, and share data is designed to advance data-sharing norms, harmonise data definitions and improve data use. These data are NOT shared for auditing, investment, or legal purposes. Please independently verify any data that you might use in decision making. We provide no guarantees or assurances as to the quality of these data. Data may be inaccurate, incomplete, inconsistent, and/or not current for various reasons: INDIGO is a collaborative and iterative initiative that mostly relies on projects all over the world volunteering to share their data. We have a system for processing information and try to attribute data to named sources, but we do not audit, cross-check, or verify all information provided to us. It takes time and resources to share data, which may not have been included in a project’s budget. Many of the projects are ongoing and timely updates may not be available. Different people may have different interpretations of data items and definitions. Even when data are high quality, interpretation or generalisation to different contexts may not be possible and/or requires additional information and/or expertise. Help us improve our data quality: email us at indigo@bsg.ox.ac.uk if you have data on new projects, changes or performance updates on current projects, clarifications or corrections on our data, and/or confidentiality or sensitivity notices. Please also give input via the INDIGO Data Definitions Improvement Tool and INDIGO Feedback Questionnaire.
</sup></sub>
