# Import Interreg NWE Data 
### http://www.nweurope.eu/media/4533/list-of-beneficiaries-for-website.xlsx

In [24]:
import json

import pandas as pd
import numpy as np

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_colwidth', 100)

In [25]:
all_benefs = pd.read_excel('input/list-of-beneficiaries-for-website.xlsx')
all_benefs.shape

(536, 12)

## Load and Restrict Data to United Kingdom

In [26]:
#all_benefs.columns

In [27]:
all_benefs.rename({
    'No\nNo\nNr.\nNr.': 'Id',
    'Beneficiary name\nNom du bénéfictiaire\nName des Begünstigten \nNaam van de begunstigde ': 'beneficiary',
    "Operation name\nNom de l'opération\nBezeichnung des Vorhabens\nNaam van de concrete actie": 'project',
    "Operation summary\nRésumé de l'opération\nZusammenfassung des Vorhabens\nSamenvatting van de concrete actie": 'project_summary',
    "Operation start date\nDate de début de l'opération\nDatum des Beginns des Vorhabens\nBegindatum van de concrete actie": 'start_date',
    "Operation end date\nDate de fin de l'opération \nDatum des Endes des Vorhabens \nEinddatum van de concrete actie": 'end_date',
    'Total eligible expenditure allocated to the beneficiary\nTotal des dépenses éligibles attribué au bénéficiaire\nGesamtbetrag der förderfähigen Ausgaben an Begünstigte\nTotale subsidiabele uitgaven toegewezen aan begunstigde': 'funding',
    "Union co-financing rate\nTaux de cofinancement par l'Union \nUnions-Kofinanzierungssatz pro Prioritätsachse\nMedefinancieringspercentage van de Unie (per prioritaire as)": 'union_cofinancing',
    "Operation post code\nCode postal de l\'opération\nPostleitzahl des Vorhabens\nPostcode van de concrete actie": 'raw_postcode',
    'Country\nPays\nLand\nLand': 'country',
    "Name of category of intervention for the operation in accordance with point (b) (vi) of the first subparagraph of Article 96(2)\nDénomination de la catégorie d'intervention dont relève l'opération conformément à l'article 96, paragraphe 2,\npremier alinéa, point b) vi)\nBezeichnung der Interventionskategorie für das Vorhaben gemäß Artikel 96 Absatz 2 Unterabsatz 1 Buchstabe b Ziffer vi;\nNaam van de categorie steunverlening voor de concrete actie, overeenkomstig artikel 96, lid 2, eerste alinea,\nonder b), vi)": 'category',
    'Date of last update of the list of operations\nDate de la dernière mise à jour de la liste des opérations\nDatum der letzten Aktualisierung der Liste der Vorhaben\nDatum van de laatste bijwerking van de lijst van concrete acties': 'last_update'
}, axis=1, inplace=True)


In [28]:
all_benefs.columns

Index(['Id', 'beneficiary', 'project', 'project_summary', 'start_date',
       'end_date', 'funding', 'union_cofinancing', 'raw_postcode', 'country',
       'category', 'last_update'],
      dtype='object')

In [29]:
all_benefs.country.sort_values().unique()

array(['Austria', 'Belgium', 'Brussels', 'Denmark', 'France', 'Germany',
       'Ireland', 'Luxembourg', 'Spain', 'Switzerland', 'The Netherlands',
       'United Kingdom'], dtype=object)

In [30]:
benefs = all_benefs[all_benefs.country=='United Kingdom'].copy()

In [31]:
#benefs.head()

## Columns

In [32]:
benefs.count()

Id                   95
beneficiary          95
project              95
project_summary      43
start_date           43
end_date             43
funding              95
union_cofinancing    95
raw_postcode         95
country              95
category             95
last_update          95
dtype: int64

In [33]:
benefs.Id.unique()

array([ 5,  9,  1,  4,  2,  3,  6,  8,  7, 11, 10, 12])

#### Id column appears useless - not unique and doesn't list members of particular projects

In [34]:
#benefs.beneficiary.unique()

In [35]:
#benefs.category.unique()

In [36]:
benefs.category.unique().shape

(18,)

In [37]:
#benefs.groupby('category').size().reset_index(name='size').sort_values('size', ascending=False).head(18)

# Categories need cleaning up -do below

In [38]:
#Do clean up here

## Import postcode data and check imported data

In [39]:
ukpostcodes = pd.read_csv('../postcodes/input/ukpostcodes.csv.gz')

In [40]:
ukpostcodes.shape

(1762397, 4)

In [41]:
benefs.raw_postcode.isin(ukpostcodes.postcode).sum()

69

In [42]:
benefs['postcode'] = benefs.raw_postcode.\
    str.upper().\
    str.strip().\
    str.replace(r'[^A-Z0-9]', '').\
    str.replace(r'^(\S+)([0-9][A-Z]{2})$', r'\1 \2').\
    str.replace('UK ', '')

In [43]:
benefs.postcode.isin(ukpostcodes.postcode).sum()

88

In [44]:
benefs.raw_postcode[~benefs.postcode.isin(ukpostcodes.postcode)].unique()

array(['Y010 5DG', 'PO Box 322', 'BY9 7BL', 'EH10 6YK', 'ME4 4RB',
       'TN34 4UY ', 'UK ME19 6BJ'], dtype=object)

In [45]:
benefs[benefs in benefs.postcode.isin(ukpostcodes.postcode)

SyntaxError: unexpected EOF while parsing (<ipython-input-45-c233060a3a3a>, line 1)

In [46]:
benefs.count()

Id                   95
beneficiary          95
project              95
project_summary      43
start_date           43
end_date             43
funding              95
union_cofinancing    95
raw_postcode         95
country              95
category             95
last_update          95
postcode             95
dtype: int64

### Typos, deprecated postcodes, one mistake, and one prefixed with UK - can't seem to get rid of it.

## Import exchange rate.

In [47]:
eur_gbp = pd.read_pickle('../exchange_rates/output/exchange_rates.pkl.gz')
eur_gbp.tail()

Unnamed: 0,month_start,rate
84,2025-08-01,0.870336
85,2025-09-01,0.870336
86,2025-10-01,0.870336
87,2025-11-01,0.870336
88,2025-12-01,0.870336


In [48]:
eur_gbp.count

<bound method DataFrame.count of     month_start      rate
235  1999-01-01  0.705455
234  1999-02-01  0.692800
233  1999-03-01  0.688800
232  1999-04-01  0.663800
231  1999-05-01  0.658400
230  1999-06-01  0.654600
229  1999-07-01  0.654400
228  1999-08-01  0.666500
227  1999-09-01  0.658200
226  1999-10-01  0.643000
225  1999-11-01  0.641500
224  1999-12-01  0.629700
223  2000-01-01  0.622600
222  2000-02-01  0.605100
221  2000-03-01  0.607200
220  2000-04-01  0.601000
219  2000-05-01  0.582700
218  2000-06-01  0.622600
217  2000-07-01  0.625700
216  2000-08-01  0.615000
215  2000-09-01  0.614800
214  2000-10-01  0.602500
213  2000-11-01  0.581200
212  2000-12-01  0.606900
211  2001-01-01  0.623800
210  2001-02-01  0.629800
209  2001-03-01  0.634900
208  2001-04-01  0.614800
207  2001-05-01  0.625400
206  2001-06-01  0.602400
..          ...       ...
59   2023-07-01  0.870336
60   2023-08-01  0.870336
61   2023-09-01  0.870336
62   2023-10-01  0.870336
63   2023-11-01  0.870336
64   

In [49]:
eur_gbp.rate.isna().sum()

0

## Check that everything has start and end dates

In [50]:
benefs.start_date.isna().sum()

52

In [51]:
benefs.start_date.fillna(value='01.01.2015', inplace=True)

In [52]:
benefs.head()

Unnamed: 0,Id,beneficiary,project,project_summary,start_date,end_date,funding,union_cofinancing,raw_postcode,country,category,last_update,postcode
4,5,Aberdeen City Council,ACE-Retrofitting - Accelerating Condominium Energy Retrofitting,"Although local authorities (LAs) are tackling climate change, a majority of buildings in NWE are...",15.09.2016,14.03.2020,404388.2,0.6,AB10 1AQ,United Kingdom,"013 - Energy efficiency renovation of public infrastructure, emonstration projects and supportin...",23.10.2018,AB10 1AQ
8,9,Changeworks Resources for Life Ltd.,ACE-Retrofitting - Accelerating Condominium Energy Retrofitting,"Although local authorities (LAs) are tackling climate change, a majority of buildings in NWE are...",15.09.2016,14.03.2020,117504.16,0.6,EH6 5PY,United Kingdom,"013 - Energy efficiency renovation of public infrastructure, emonstration projects and supportin...",23.10.2018,EH6 5PY
9,1,University of Liverpool,AFTB - Towards adhesive free timber buildings,The project addresses the wasteful and harmful use of toxic adhesives in the manufacturing of En...,15.09.2016,14.03.2020,1486849.64,0.6,L69 7ZX,United Kingdom,069 - Support to environmentally-friendly production processes and resource efficiency in SMEs,23.10.2018,L69 7ZX
15,1,Swansea University,ALG-AD - Creating value from waste nutrients by integrating algal and anaerobic digestion techno...,,01.01.2015,,1091750.49,0.6,SA28PP,United Kingdom,"19 - Commercial, industrial or hazardous waste management",23.10.2018,SA2 8PP
18,4,Birmingham City University,ALG-AD - Creating value from waste nutrients by integrating algal and anaerobic digestion techno...,,01.01.2015,,336378.96,0.6,B4 7XG,United Kingdom,"19 - Commercial, industrial or hazardous waste management",23.10.2018,B4 7XG


In [53]:
benefs.end_date.isna().sum()

52

In [54]:
benefs.end_date.fillna(value='31.01.2015', inplace=True)

In [55]:
benefs.start_date.unique()

array(['15.09.2016', '01.01.2015', '25.02.2016', '16.03.2017',
       '25.05.2016'], dtype=object)

In [56]:
benefs.isna().count()

Id                   95
beneficiary          95
project              95
project_summary      95
start_date           95
end_date             95
funding              95
union_cofinancing    95
raw_postcode         95
country              95
category             95
last_update          95
postcode             95
dtype: int64

In [57]:
benefs.head()

Unnamed: 0,Id,beneficiary,project,project_summary,start_date,end_date,funding,union_cofinancing,raw_postcode,country,category,last_update,postcode
4,5,Aberdeen City Council,ACE-Retrofitting - Accelerating Condominium Energy Retrofitting,"Although local authorities (LAs) are tackling climate change, a majority of buildings in NWE are...",15.09.2016,14.03.2020,404388.2,0.6,AB10 1AQ,United Kingdom,"013 - Energy efficiency renovation of public infrastructure, emonstration projects and supportin...",23.10.2018,AB10 1AQ
8,9,Changeworks Resources for Life Ltd.,ACE-Retrofitting - Accelerating Condominium Energy Retrofitting,"Although local authorities (LAs) are tackling climate change, a majority of buildings in NWE are...",15.09.2016,14.03.2020,117504.16,0.6,EH6 5PY,United Kingdom,"013 - Energy efficiency renovation of public infrastructure, emonstration projects and supportin...",23.10.2018,EH6 5PY
9,1,University of Liverpool,AFTB - Towards adhesive free timber buildings,The project addresses the wasteful and harmful use of toxic adhesives in the manufacturing of En...,15.09.2016,14.03.2020,1486849.64,0.6,L69 7ZX,United Kingdom,069 - Support to environmentally-friendly production processes and resource efficiency in SMEs,23.10.2018,L69 7ZX
15,1,Swansea University,ALG-AD - Creating value from waste nutrients by integrating algal and anaerobic digestion techno...,,01.01.2015,31.01.2015,1091750.49,0.6,SA28PP,United Kingdom,"19 - Commercial, industrial or hazardous waste management",23.10.2018,SA2 8PP
18,4,Birmingham City University,ALG-AD - Creating value from waste nutrients by integrating algal and anaerobic digestion techno...,,01.01.2015,31.01.2015,336378.96,0.6,B4 7XG,United Kingdom,"19 - Commercial, industrial or hazardous waste management",23.10.2018,B4 7XG


In [58]:
benefs.end_date.unique()

array(['14.03.2020', '31.01.2015', '31.07.2019', '31.05.2020',
       '25.02.2019', '15.03.2021', '25.10.2019', '24.05.2019',
       '30.11.2019', '14.09.2019', '13.12.2019', '14.07.2020',
       '15.03.2020', '14.09.2020', '14.12.2019', '15.09.2020'],
      dtype=object)

#### They don't! What to do? We're setting the start date as 01/01/2015 under advice from John Lees-Miller

## Strip € symbol from funding?

In [59]:
benefs.funding.unique()

array([ 404388.2  ,  117504.16 , 1486849.64 , 1091750.49 ,  336378.96 ,
         72617.93 ,  360531.51 ,  361038.43 ,  842148.6  ,  601712.47 ,
        307410.04 ,  222167.27 ,  428022.74 ,  133300.362,  256813.644,
       1218956.87 , 1280527.6  ,  120000.   ,  198420.   ,  711514.65 ,
        116670.   ,  643449.648,  918838.782,   97341.   ,  411549.216,
         53534.676,  799828.06 , 1082892.   ,  272988.   ,  544270.   ,
        213734.616,  408786.834,  275035.2  ,   79628.74 ,  660549.4  ,
       4604665.01 ,  207253.32 ,  207253.34 ,  231999.99 ,  121799.99 ,
       1043630.95 , 3449249.25 ,  642883.9  ,  601320.7  ,  526240.   ,
        407145.   ,  231707.724,  147689.37 , 1742335.59 , 1014142.5  ,
       1624435.668,  317034.792, 2509350.   ,  862168.23 ,  508450.302,
        236575.   ,  189743.838,  166620.186,  419912.178,  702142.98 ,
        300197.4  ,  403318.76 ,  896454.53 ,  460400.22 ,  339424.74 ,
        165222.6  ,   58265.79 ,  371370.45 ,  537451.05 ,  3340

#### No need imported as floats 

In [60]:
eur_gbp.columns

Index(['month_start', 'rate'], dtype='object')

In [61]:
eur_gbp.dtypes

month_start    datetime64[ns]
rate                  float64
dtype: object

In [62]:
benefs.dtypes

Id                     int64
beneficiary           object
project               object
project_summary       object
start_date            object
end_date              object
funding              float64
union_cofinancing    float64
raw_postcode          object
country               object
category              object
last_update           object
postcode              object
dtype: object

In [64]:
def find_average_eur_gbp_rate(row):
     #df = eur_gbp[(eur_gbp['month_start'] > pd.to_datetime(row['start_date'], 'yyyy-mm-dd')) & (eur_gbp['month_start'] <= pd.to_datetime(row['end_date'], 'yyyy-mm-dd'))]
    #df=eur_gbp[eur_gbp['month_start'] > '20150101000000']
    df=eur_gbp[(eur_gbp['month_start'] >= pd.to_datetime(row['start_date'] , format='%d.%m.%Y')) & \
               (eur_gbp['month_start'] <= pd.to_datetime(row['end_date'] , format='%d.%m.%Y'))]
    return df.rate.mean() 
    
benefs['eur_gbp'] = benefs.apply(find_average_eur_gbp_rate, axis=1)

benefs.head()

Unnamed: 0,Id,beneficiary,project,project_summary,start_date,end_date,funding,union_cofinancing,raw_postcode,country,category,last_update,postcode,eur_gbp
4,5,Aberdeen City Council,ACE-Retrofitting - Accelerating Condominium Energy Retrofitting,"Although local authorities (LAs) are tackling climate change, a majority of buildings in NWE are...",15.09.2016,14.03.2020,404388.2,0.6,AB10 1AQ,United Kingdom,"013 - Energy efficiency renovation of public infrastructure, emonstration projects and supportin...",23.10.2018,AB10 1AQ,0.872518
8,9,Changeworks Resources for Life Ltd.,ACE-Retrofitting - Accelerating Condominium Energy Retrofitting,"Although local authorities (LAs) are tackling climate change, a majority of buildings in NWE are...",15.09.2016,14.03.2020,117504.16,0.6,EH6 5PY,United Kingdom,"013 - Energy efficiency renovation of public infrastructure, emonstration projects and supportin...",23.10.2018,EH6 5PY,0.872518
9,1,University of Liverpool,AFTB - Towards adhesive free timber buildings,The project addresses the wasteful and harmful use of toxic adhesives in the manufacturing of En...,15.09.2016,14.03.2020,1486849.64,0.6,L69 7ZX,United Kingdom,069 - Support to environmentally-friendly production processes and resource efficiency in SMEs,23.10.2018,L69 7ZX,0.872518
15,1,Swansea University,ALG-AD - Creating value from waste nutrients by integrating algal and anaerobic digestion techno...,,01.01.2015,31.01.2015,1091750.49,0.6,SA28PP,United Kingdom,"19 - Commercial, industrial or hazardous waste management",23.10.2018,SA2 8PP,0.7823
18,4,Birmingham City University,ALG-AD - Creating value from waste nutrients by integrating algal and anaerobic digestion techno...,,01.01.2015,31.01.2015,336378.96,0.6,B4 7XG,United Kingdom,"19 - Commercial, industrial or hazardous waste management",23.10.2018,B4 7XG,0.7823


## Drop uninteresting columns

In [65]:
clean_benefs = benefs.drop(['Id','country','project_summary','raw_postcode'], axis=1)

In [66]:
clean_benefs.head()

Unnamed: 0,beneficiary,project,start_date,end_date,funding,union_cofinancing,category,last_update,postcode,eur_gbp
4,Aberdeen City Council,ACE-Retrofitting - Accelerating Condominium Energy Retrofitting,15.09.2016,14.03.2020,404388.2,0.6,"013 - Energy efficiency renovation of public infrastructure, emonstration projects and supportin...",23.10.2018,AB10 1AQ,0.872518
8,Changeworks Resources for Life Ltd.,ACE-Retrofitting - Accelerating Condominium Energy Retrofitting,15.09.2016,14.03.2020,117504.16,0.6,"013 - Energy efficiency renovation of public infrastructure, emonstration projects and supportin...",23.10.2018,EH6 5PY,0.872518
9,University of Liverpool,AFTB - Towards adhesive free timber buildings,15.09.2016,14.03.2020,1486849.64,0.6,069 - Support to environmentally-friendly production processes and resource efficiency in SMEs,23.10.2018,L69 7ZX,0.872518
15,Swansea University,ALG-AD - Creating value from waste nutrients by integrating algal and anaerobic digestion techno...,01.01.2015,31.01.2015,1091750.49,0.6,"19 - Commercial, industrial or hazardous waste management",23.10.2018,SA2 8PP,0.7823
18,Birmingham City University,ALG-AD - Creating value from waste nutrients by integrating algal and anaerobic digestion techno...,01.01.2015,31.01.2015,336378.96,0.6,"19 - Commercial, industrial or hazardous waste management",23.10.2018,B4 7XG,0.7823


In [67]:
#clean_benefs.Id.unique()

In [68]:
clean_benefs.beneficiary.unique()

array(['Aberdeen City Council', 'Changeworks Resources for Life Ltd.',
       'University of Liverpool', 'Swansea University',
       'Birmingham City University', 'Langage AD',
       'GM Business Support Ltd', 'Kent County Council',
       'Glasgow City Council', 'The National Non-Food Crop Centre',
       'University of York', 'The Electrospinning Company Limited',
       'Fianium Ltd', 'Amicus Horizon', 'Plymouth City Council',
       'The Rivers Trust', 'Durham County Council', 'Sustrans',
       'Centre of Excellence for Low Carbon and Fuel Cell Technologies',
       'Transport for London', 'Nottingham City Council',
       'University of Leicester', 'The University of Lincoln',
       'Greater Lincolnshire Local Enterprise Partnership',
       'The National Energy Foundation', 'Moat Homes Limited',
       'Clarion Housing Group',
       'The Green Valleys (Wales) Community Interest Company',
       'Mental Health Foundation', 'Royal Borough of Greenwich',
       'University of G

In [69]:
clean_benefs.columns[0]

'beneficiary'

In [70]:
clean_benefs.shape

(95, 10)

In [72]:
output_benefs = clean_benefs[
    clean_benefs.postcode.isin(ukpostcodes.postcode)
].copy()

In [73]:
output_benefs.count()

beneficiary          88
project              88
start_date           88
end_date             88
funding              88
union_cofinancing    88
category             88
last_update          88
postcode             88
eur_gbp              88
dtype: int64

In [74]:
output_benefs['nwreg_benefs_id'] = 'nwreg_benefs_' + output_benefs.index.map(str)

In [75]:
output_benefs.nwreg_benefs_id.head()

4      nwreg_benefs_4
8      nwreg_benefs_8
9      nwreg_benefs_9
15    nwreg_benefs_15
18    nwreg_benefs_18
Name: nwreg_benefs_id, dtype: object

In [76]:
output_benefs.to_pickle('output/interreg_beneficiaries.pkl.gz')