In [None]:
import geopandas as gpd
import pandas as pd
import json
import requests
from bs4 import BeautifulSoup
import urllib.request
from datetime import datetime

In [None]:
with urllib.request.urlopen("https://www.trusselltrust.org/get-help/find-a-foodbank/foodbank-search/?foodbank_s=all&callback=?") as url:
    data = json.loads(url.read().decode()[2:-2])

The Trussell Trust interactive map uses a JSON file with a list of dictionaries. Each distionary is associated with a small region and can consist of multiple foodbank centres. So each dictionary in the list has an additional nested dictionary with centre-specific information. Below is an attempt to extract some important features and create a simpler list of dictionaries corresponding to each unique foodbank distribution centre.

The centre geolocation and opening time information is contained in a third nested dictionary which should be extracted appropriately.

The Trussell Trust doesn't provide stock quantities for foodbanks but it does provide opening and closing times. My idea is to calculate weekly time spend open for each food bank, with more time spent open indicating higher risk of food insecurity for that region.

In [None]:
# Opening time information consists of dictionaries for each day, containing the opening and closing time for that day.
data[0]['foodbank_centre'][1]['opening_time']

In [None]:
# By looping through, it is possible to pair up opening and closing times, priming the data for a time difference calculation.
for dictionary in data:
    if dictionary['foodbank_centre'] != False:
        for fbank in dictionary['foodbank_centre']:
            try:
                s=0
                for i in fbank['opening_time']:
                    print(datetime.strptime(i['opening_time'], '%H:%M'))
                    print(datetime.strptime(i['closing_time'], '%H:%M'))
                    print('difference:',(datetime.strptime(i['closing_time'], '%H:%M')-datetime.strptime(i['opening_time'], '%H:%M')).total_seconds())
                    s += (datetime.strptime(i['closing_time'], '%H:%M')-datetime.strptime(i['opening_time'], '%H:%M')).total_seconds()
                print('total differences:',s)
                print('\n')    
            except KeyError:
                continue
    else:
        continue

In [None]:
fbanklist=[]
for dictionary in data:
    if dictionary['foodbank_centre'] != False:
        for fbank in dictionary['foodbank_centre']:
            try: # KeyError occurs because some dictionaries are empty
                fbank['website'] = dictionary['foodbank_information']['website']
                fbank['lat'] = fbank['centre_geolocation']['lat']
                fbank['lng'] = fbank['centre_geolocation']['lng']
                s=0
                for i in fbank['opening_time']:
                    s += (datetime.strptime(i['closing_time'], '%H:%M') -
                          datetime.strptime(i['opening_time'], '%H:%M')).total_seconds()
                fbank['total_time_open'] = s   
                fbanklist.append(fbank)
            except KeyError:
                continue
    else:
        continue

In [None]:
foodbanks = pd.DataFrame(fbanklist)
foodbanks

There are two address strings for each row in the dataframe, one of which still has \r, \n and <br \> elements.

In [None]:
# Regional stats for Trussell Trust food banks. 6 Tables corresponding to 6 years.
reg_stats = pd.read_html('https://www.trusselltrust.org/news-and-blog/latest-stats/end-year-stats')

In [None]:
reg_stats[0]

Now we have regional statistics and a postcode associated with every food bank. We could use data relating postcodes to regions.

In [None]:
codes_regions = pd.read_html('https://www.robertsharp.co.uk/2017/08/09/a-table-that-shows-the-uk-region-for-all-postcode-districts/')[0]

In [None]:
codes_regions[['UK region', 'Postcode prefix']]