In [None]:
import json
import math
import re

import pandas as pd
import numpy as np

In [None]:
#import the data for 2017
fts_bgue_2017 = pd.read_excel('data/fts/input/export_2017_en.xlsx')
fts_bgue_2017.shape

In [None]:
fts_bgue_2017.columns

In [None]:
#rename columns to more usable format

fts_bgue_2017=fts_bgue_2017.rename(index=str, columns={'Country / Territory': 'country', 
        'Reference of the Legal Commitment (LC)': 'reference_of_lc', 
        'Commitment position key': 'commitment_position_key',
       'Name of beneficiary': 'name_of_beneficiary',
       'VAT Number of beneficiary': 'beneficiary_vat', 
        'Postal code': 'postcode',
       'Source of (estimated) detailed amount': 'source_of_amount', 
        'Geographical Zone': 'geographical_zone',
       'Expense Type': 'expense_type', 'Total amount': 'total_amount', 
        'Subject of grant or contract':'subject_of_grant_or_contract',
       'Responsible Department': 'responsible_department', 
        'Budget line name and number': 'budget_line_name_and_number', 'Action Type': 'action_type',
       'Funding Type': 'funding_type', 'LE Acct Group Code': 'le_acct_group_code', 
        'LE Acct Group Desc': 'le_acct_group_desc'})

In [None]:
#is this column being used at all?
fts_bgue_2017.reference_of_lc.unique()

Oh, it is in the 2017 data, interesting, wasn't there in the 2016 data

In [None]:
#forward fill amounts, as there are merged cells in the original 2017 database which are being filled with NaNs


fts_bgue_2017.commitment_position_key = pd.Series(fts_bgue_2017.commitment_position_key).fillna(method='ffill')
fts_bgue_2017.Year = pd.Series(fts_bgue_2017.Year).fillna(method='ffill')
fts_bgue_2017.expense_type = pd.Series(fts_bgue_2017.expense_type).fillna(method='ffill')
fts_bgue_2017.subject_of_grant_or_contract = pd.Series(fts_bgue_2017.subject_of_grant_or_contract).fillna(method='ffill')
fts_bgue_2017.responsible_department = pd.Series(fts_bgue_2017.responsible_department).fillna(method='ffill')
fts_bgue_2017.budget_line_name_and_number = pd.Series(fts_bgue_2017.budget_line_name_and_number).fillna(method='ffill')
fts_bgue_2017.action_type = pd.Series(fts_bgue_2017.action_type).fillna(method='ffill')
fts_bgue_2017.funding_type = pd.Series(fts_bgue_2017.funding_type).fillna(method='ffill')


In [None]:
#filter just UK funding

fts_bgue_2017 = fts_bgue_2017[fts_bgue_2017.country == 'United Kingdom']
fts_bgue_2017.shape

In [None]:
fts_bgue_2017.head()

In [None]:
#drop columns we're not interested in

fts_bgue_2017 = fts_bgue_2017.drop(['Address', 'City', 'reference_of_lc', 'commitment_position_key', 'beneficiary_vat', 'geographical_zone'], axis=1)


In [None]:
fts_bgue_2017[(fts_bgue_2017.Amount != 0.0) & ~pd.isnull(fts_bgue_2017.Amount) ].count()

In [None]:
fts_bgue_2017_amounts = fts_bgue_2017[(fts_bgue_2017.Amount != 0.0) & ~pd.isnull(fts_bgue_2017.Amount) ].copy()

In [None]:
fts_bgue_2017_amounts['postcode'] = fts_bgue_2017_amounts.postcode.str.strip().str.replace(' ', '')

In [None]:
postcodes = pd.read_csv('data/postcodes/input/ukpostcodes.csv')

In [None]:
postcodes['postcode'] = postcodes.postcode.str.strip().str.replace(' ', '')

In [None]:
fts_bgue_2017_amounts =  pd.merge(fts_bgue_2017_amounts, postcodes,  left_on=['postcode'], right_on=['postcode'])


In [None]:
fts_bgue_2017_amounts.shape

In [None]:
fts_bgue_2017_amounts[fts_bgue_2017_amounts.name_of_beneficiary == 'THE UBELE INITIATIVE']

In [None]:
def make_fts2017_data_geo_json(coordis_data):
    def make_feature(row):
        properties = {
            property: row[property]
            for property in ['name_of_beneficiary', 'Amount', 'budget_line_name_and_number', 'Year']
            if str(row[property]) != 'nan'
        }

        return {
            'type': 'Feature',
            'geometry': {
                "type": "Point",
                "coordinates": [row['longitude'], row['latitude']]
            },
            'properties': properties
        }
    features = list(coordis_data.apply(make_feature, axis=1))
    return { 'type': 'FeatureCollection', 'features': features }
with open('data/fts/output/fts2017_data.geo.json', 'w') as file:
    json.dump(make_fts2017_data_geo_json(
        fts_bgue_2017_amounts[~pd.isnull(fts_bgue_2017_amounts['latitude'])]
    ), file, sort_keys=True)