# Fund -  Timelines

## Libs and setup


In [None]:
!pip install --upgrade plotly
!pip install kaleido

In [None]:
import requests
import plotly.express as px
import pandas as pd
from datetime import datetime

INDIGO_DATABASE_API = 'https://golab-indigo-data-store.herokuapp.com/app/api1'


def api_get_item(endpoint, public_id=None):
    """
    Get individual item details from the API

    E.g. 
    item = api_get_item('project', 'INDIGO-POJ-0158')
    """
    try:
        if public_id:
            response = requests.get(f'{INDIGO_DATABASE_API}/{endpoint}/{public_id}')
        else:
            response = requests.get(f'{INDIGO_DATABASE_API}/{endpoint}')
        item = response.json()
        return item
    except Exception as e:
        print(f'\nFailed to retrieve {endpoint} "{public_id}".\nError: {e}')
        return False

PROJECT_DATE_START_KEYS = ['contract_signed','contract_all_parties_signed','start_date']
PROJECT_DATE_END_KEYS = ['actual_completion','anticipated_completion','actual_end','anticipated_end']
FUND_DATE_START_KEYS = ['expression_of_interest','launch_date']
FUND_DATE_END_KEYS = ['actual_end_date','anticipated_end_date']
ACCEPTED_DATE_FORMATS = ['%Y-%m-%d', '%Y-%m', '%Y']

def _convert_date_string_to_timestamp(time_string):
    for format in ACCEPTED_DATE_FORMATS:
        try:
            return datetime.strptime(str(time_string), format).timestamp()
        except ValueError:
            pass

def get_date_from_project_data(data, keys):
    for key in keys:
        if data[key]:
            return data[key], _convert_date_string_to_timestamp(data[key])
    return None, None

def get_date_from_fund_data(data, keys):
    for key in keys:
        value = data.get('dates',{}).get(key,{}).get('value')
        if value:
            return value, _convert_date_string_to_timestamp(value)
    return None, None

def get_default_end_date_from_start_date(start_date):
    if '-' in start_date:
        bits = start_date.split('-')
        if bits[1] == "12":
            return str(int(bits[0])+1)+'-01'
        else:
            return bits[0]+'-'+'{:02d}'.format(int(bits[1])+1)
    else:
        return str(start_date)+"-01"

def viz_for_fund(fund_id, sankey_height=500):

    # Fund min/max
    min_fund_date, min_fund_timestamp = get_date_from_fund_data(fund_data[fund_id], FUND_DATE_START_KEYS)
    max_fund_date, max_fund_timestamp = get_date_from_fund_data(fund_data[fund_id], FUND_DATE_END_KEYS)

    # Get projects data 
    # (Check fund min/max at same time)
    projects_data = []
    for project_id in fund_has_projects[fund_id]:
        project_start_date, project_start_timestamp = get_date_from_project_data(project_dates[project_id], PROJECT_DATE_START_KEYS)
        project_end_date, project_end_timestamp = get_date_from_project_data(project_dates[project_id], PROJECT_DATE_END_KEYS)
        if project_start_date and project_end_date:
            projects_data.append(dict(
                Object=project_id, 
                Start=project_start_date, 
                Finish=project_end_date, 
                Resource="Project",
            ))
            if min_fund_timestamp is None or project_start_timestamp < min_fund_timestamp:
                min_fund_date = project_start_date
                min_fund_timestamp = project_start_timestamp
            if max_fund_timestamp is None or project_end_timestamp > max_fund_timestamp:
                max_fund_date = project_end_date
                max_fund_timestamp = project_end_timestamp                
        elif project_start_date and not project_end_date:
            projects_data.append(dict(
                Object=project_id, 
                Start=project_start_date,
                Finish=get_default_end_date_from_start_date(project_start_date), 
                Resource="Project with unknown end"
            ))
            if min_fund_timestamp is None or project_start_timestamp < min_fund_timestamp:
                min_fund_date = project_start_date
                min_fund_timestamp = project_start_timestamp
            if max_fund_timestamp is None or project_start_timestamp > max_fund_timestamp:
                max_fund_date = project_start_date
                max_fund_timestamp = project_start_timestamp     
    # Get data for viz
    data = []
    if min_fund_date and max_fund_date:
        data.append(dict(
            Object=fund_id, 
            Start=min_fund_date, 
            Finish=max_fund_date, 
            Resource="Fund",
            ))
    data.extend(projects_data)

    if data:
        fig = px.timeline(
            pd.DataFrame(data), 
            x_start="Start", 
            x_end="Finish", 
            y="Object", 
            color="Resource",
            )
        fig.update_yaxes(autorange="reversed") # otherwise tasks are listed from the bottom up
        fig.update_layout(
            height=sankey_height,
            yaxis=dict(tickmode='linear', title="Fund and Projects")
        )
        fig.show()
        fig.write_image('fun-and-project-timelines-'+fund_id+'.png')


## Get Fund data
Call the INDIGO API 'fund' endpoint and retrieve the data.


In [None]:
public_ids = []
endpoint = 'fund'

for fund_data in api_get_item(endpoint).get('funds'):
    if fund_data.get('public'):
        public_ids.append(fund_data.get('id'))

fund_data = {}

for fund_id in public_ids:
    print("Getting Fund " + fund_id)
    fund_data[fund_id] = api_get_item(endpoint, fund_id).get('fund',{}).get('data',{})

## Get Project Data

In [None]:
!wget https://golab-indigo-data-store.herokuapp.com/app/all_public_data_file_per_data_type_csv.zip
!unzip -o all_public_data_file_per_data_type_csv.zip

In [None]:
import csv
from collections import defaultdict

# Get a list of which funds have which projects
fund_has_projects = defaultdict(list)
with open('projects_outcome_funds.csv', newline='') as csvfile:
     outcomefundreader = csv.DictReader(csvfile)
     for row in outcomefundreader:
         fund_has_projects[row['Id']].append(row['Project ID'])

# Get a list of Project Dates
project_dates =  {}
with open('projects.csv', newline='') as csvfile:
     outcomefundreader = csv.DictReader(csvfile)
     for row in outcomefundreader:
         project_dates[row['ID']] = {
             'contract_signed': row['Dates - Date outcomes contract signed - (Value)'],
             'contract_all_parties_signed': row['Dates - Date contracts between all parties signed - (Value)'],
             'anticipated_completion': row['Dates - Anticipated completion date - (Value)'],
             'actual_completion': row['Dates - Actual completion date - (Value)'],
             'start_date': row['Dates - Start date of service provision - (Value)'],
             'anticipated_end': row['Dates - Anticipated end date of service provision - (Value)'],
             'actual_end': row['Dates - Actual end date of service provision - (Value)'],
         }


## Make Visulisation!

In [None]:
for fund_id in fund_data:
    print("Fund " + fund_id)
    viz_for_fund(fund_id)
