### What: Improve understanding of waste and recycling material flows in the UK with a view to reduce waste and improve recycling rates.
### How: We aim to identify target human behaviours and areas of strategy focus which would be most effective.
### How: Analysing recycling data gathered by local authorities, combining it with any additional sources, quantifying and comparing tonnes of waste flows across boroughs and recycling facilities


## Resources

*  https://www.environment.gov.scot/media/2214/20180314_2017_wds_workshop_outcomes.pdf

### Annual recycling London vs England and London Boroughs dataset + opinion poll

* https://data.london.gov.uk/gla-poll-results/
* https://data.london.gov.uk/download/local-authority-collected-waste-management-london/4d88d88c-31a7-468b-95e9-49fba78ad20e/la-collected-waste.xls
* https://data.london.gov.uk/download/household-waste-recycling-rates-borough/15ddc38a-0a37-4f69-98b5-e69e549b39d3/Household%20recycling.csv

### Waste dataflow (zip xlsx format - be patient with the UI)

* http://www.wastedataflow.org/login.aspx?logoff=true
* http://www.wastedataflow.org/reports/default.aspx

### local authority district shapefiles:

* http://geoportal.statistics.gov.uk/datasets/ae90afc385c04d869bc8cf8890bd1bcd_2
* http://geoportal.statistics.gov.uk/datasets/ae90afc385c04d869bc8cf8890bd1bcd_3

* https://github.com/martinjc/UK-GeoJSON/tree/master/json
* https://martinjc.github.io/UK-GeoJSON/

### cambridge guy's paper

In [None]:
import pandas as pd
import numpy as np
import json
import folium
#from jinja2 import Template

## London Borough Recycling Rates

In [None]:
df_recycling_rates = pd.read_excel('./datasets/household-recycling-borough.xls', sheet_name='Household Recycling Rates', skiprows=1, header=0, nrows = 33)
df_recycling_rates.columns = ["New_Code","Code","Area","y_1998_99","y_1999_00","y_2000_01","y_2001_02","y_2002_03","y_2003_04","y_2004_05","y_2005_06","y_2006_07","y_2007_08","y_2008_09","y_2009_10","y_2010-11","y_2011_12","y_2012_13","y_2013_14","y_2014_15","y_2015_16","y_2016_17"]

In [None]:
df_recycling_rates.head()

In [None]:
# List Borough codes names to compare vs shapefiles
df_recycling_rates[['New_Code','Area']].head(35)

## Import and filter local authority shapefile

In [None]:
# filter polygon LAs to contain only ones we have recycling aggregated data for

# get a list of LAs
LA_list = df_recycling_rates['New_Code'].values

# load json object
LAGeoJson = './lad2.json'
with open(LAGeoJson) as j:
    la_geojson = json.load(j)
    j.close()
    
buffer = []
for record in la_geojson['features']:
    if record['properties']['LAD13CD'] in LA_list:
        buffer.append(dict(record))

# write the filtered json        
with open(".datasets/lad_selected.json", "w", encoding = "utf8") as f:
    f.write(json.dumps({"type": "FeatureCollection",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },"features": buffer}, indent=2) + "\n")
    f.close()

# Plot London recycling rates on map

In [None]:
LAGeoJson = '.datasets/lad_selected.json'
LAMap3 = "./charts/lad_selected.html"

m = folium.Map(location=[51.5, -0.1], zoom_start=11)

folium.Choropleth(
    geo_data = LAGeoJson,
    legend_name='Local Authority Recycling rates',
    data = df_recycling_rates[['New_Code','y_2016_17']],
    columns = ['New_Code', 'y_2016_17'],
    key_on='feature.properties.LAD13CD',
    fill_color='RdYlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    label = 'feature.properties.LAD13NM'
    
).add_to(m)

folium.LayerControl().add_to(m)

m.save(outfile = LAMap3)


m

## Waste Dataflow dataset. Recycling by meterial type. All Local Authorities, quarterly

In [None]:
df = pd.read_excel('./datasets/Raw_Data_Download_London_quarter.xlsx', sheet_name='Q100', skiprows=0, header=1, usecols="A:N,S:Y,AB-AD,AI")

In [None]:
df.head(4)

In [None]:
df_stream.columns

In [None]:
df['year'] = df['Period'].str.split(' ').str[1]

In [None]:
# Optional: remove rows if only interested in %recycled by material type:
# FacilityType = "Materials recovery facility" which may stand for an intermediary, total tonnage - consider adding later
# FacilityType = "Final Destination" e.g. composting 
df_stream = df[(df['FacilityTypeId'] != 8) & (df['FacilityTypeId'] != 22) & (df['WasteProcessorOutputId'] != 0) ] # & (SenderWasteProcessorOutputId != 0)

In [None]:
df_stream[['Authority','WasteStreamType','FacilityName','TonnesByMaterial','OutputProcessType','Material']].head(10)

In [None]:
df_stream['OutputProcessMaterial'] = df_stream[['OutputProcessType','Material']].replace(np.nan,'',regex=True).apply(lambda x: '_'.join(x), axis = 1)

In [None]:
# select columns required for plotting sankey
df_flows = df_stream[['Authority','FacilityName','WasteStreamType','Material','TonnesByMaterial']]

# Sankey Chart

In [None]:
# Create a 'dimensions' table. Can add extra LA and Processing Facility characteristics here.
authorities = pd.DataFrame({'authority': np.unique(df_flows['Authority'].values),'type':['authority' for x in range(len(np.unique(df_flows['Authority'].values)))]})

In [None]:
authorities

In [None]:
# https://sankeyview.readthedocs.io/en/latest/tutorials/dimension-tables.html

# Setup
from floweaver import *

# Set the default size to fit the documentation better.
size = dict(width=570, height=300)

dataset = Dataset(flows, dim_process=processes)



In [None]:
nodes = {
    'authorities': ProcessGroup('type == "authority"'),
    'facilities': ProcessGroup('type == "facility"'),
}

ordering = [
    ['authorities'],       # put "authorities" on the left...
    ['facilities'],   # ... and "facilities" on the right.
]


In [None]:
# Another partition -- but this time the dimension is the "type"
# column of the flows table
fruits_by_type = Partition.Simple('type', ['apples', 'bananas'])

# Set the colours for the labels in the partition.
palette = {'apples': 'yellowgreen', 'bananas': 'gold'}

# New SDD with the flow_partition set
sdd = SankeyDefinition(nodes, bundles, ordering,
                       flow_partition=fruits_by_type)

weave(sdd, flows, palette=palette).to_widget(**size)