https://environment.data.gov.uk/water-quality/view/doc/reference

In [9]:
import requests as r
import pandas as pd
import plotly.express as px

import plotly.io as pio
pio.templates.default = "plotly_white"

## Number of sampling points

In [2]:
closed = r.get('https://environment.data.gov.uk/water-quality/id/sampling-point?samplingPointStatus=closed&_limit=61157').json()
len(closed['items'])

28081

In [3]:
# percentage of closed 
# total = 61156

(28081/61156)*100

45.91699914971548

## Number of samples

In [25]:
samples_2000 = r.get('https://environment.data.gov.uk/water-quality/data/sample?year=2000&_limit=4000000&_view=compact').json()
len(samples_2000['items'])

268347

In [24]:
samples_2021 = r.get('https://environment.data.gov.uk/water-quality/data/sample?year=2021&_limit=4000000&_view=compact').json()
len(samples_2021['items'])

135652

In [7]:
samples_all =  r.get('https://environment.data.gov.uk/water-quality/data/sample?_limit=4000000&_view=compact')
len(samples_all['items'])

In [69]:
samples_2021['items'][0]

{'@id': 'http://environment.data.gov.uk/water-quality/data/sample/NE-49500779-20210101-1687230',
 'isComplianceSample': True,
 'purpose': 'http://environment.data.gov.uk/water-quality/def/purposes/CO',
 'sampleDateTime': '2021-01-01T00:07:00',
 'sampledMaterialType': 'http://environment.data.gov.uk/water-quality/def/sampled-material-types/4AZZ',
 'samplingPoint': 'http://environment.data.gov.uk/water-quality/id/sampling-point/NE-49500779'}

In [32]:
samples_dict = {}
total = 0
for y in range(2000, 2023):
    samples =  r.get(f'https://environment.data.gov.uk/water-quality/data/sample?year={y}&_limit=4000000&_view=compact').json()
    total = total + len(samples['items'])
    samples_dict[y] = len(samples['items'])

In [31]:
total

4734885

In [37]:
samples_dict.pop(2022)

32598

In [44]:
# percentage change 2021 and 2000
(135652 - 268347) / 268347 * 100

-49.44903427278859

In [81]:
compliance_dict = {}
for y in range(2000, 2023):
    samples =  r.get(f'https://environment.data.gov.uk/water-quality/data/sample?year={y}&_limit=4000000&_view=compact&isComplianceSample=true').json()
    compliance_dict[y] = len(samples['items'])

In [86]:
compliance_dict.pop(2022)

18912

In [None]:
samples_df = pd.DataFrame.from_dict({'year': compliance_dict.keys(), 'compliance':compliance_dict.values(), 'total':samples_dict.values()})

samples_df['general'] = samples_df.total - samples_df.compliance

In [92]:
samples_df = samples_df.set_index('year')
samples_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 22 entries, 2000 to 2021
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   compliance  22 non-null     int64
 1   total       22 non-null     int64
 2   general     22 non-null     int64
dtypes: int64(3)
memory usage: 704.0 bytes


In [16]:
fig = px.line(samples_df, width=800)


In [94]:
fig.update_layout(title =  '50% fewer water samples taken in 2021 compared to 2000', xaxis_title='', yaxis_title='Total water samples taken')

In [41]:
fig = px.line(samples_df, y='total', width=800)

fig.add_annotation(x=2000, y=samples_df.at[2000,'total'],
            text=str(samples_df.at[2000,'total']),
            showarrow=True,
            arrowhead=1)
fig.add_annotation(x=2021, y=samples_df.at[2021,'total'],
            text=str(samples_df.at[2021,'total']),
            showarrow=True,arrowhead=1,
            yshift=10)

fig.update_layout(title =  '50% fewer water samples taken in 2021 compared to 2000', xaxis_title='', yaxis_title='Total water samples taken')

In [53]:
import chart_studio
import chart_studio.plotly as chart
import os

chart_studio.tools.set_credentials_file(username='papanash',
                                        api_key=os.environ['plotly_key'])

chart.plot(fig)

'https://plotly.com/~papanash/16/'