# Water Demand Tutorial 

# Land Use Breakdown per Water Company

In [17]:
%matplotlib inline

import os
import pandas as pd
import numpy as np
import geopandas as gpd
import scipy
import folium

In [18]:
# Load water company data as wrz, remove unnecessary columns
wrz = gpd.read_file(os.path.abspath('data_files/WaterSupplyAreas_incNAVs v1_4.shp'))
# List of columns to be removed
columns_to_remove = ['Disclaimer', 'Disclaim2', 'Disclaim3', 'Provenance', 'Licence', 'WARNINGS', 'Revisions']

# Drop the columns from the GeoDataFrame
wrz = wrz.drop(columns=columns_to_remove)

In [19]:
# Append PCC for 2019 to 2020 to the wrz geodataframe
# Load the CSV file
pr24_hist_pcc = pd.read_csv('data_files/pr24_hist_pcc.csv')  

# Perform the merge
merged = wrz.merge(pr24_hist_pcc[['Company', '2019-20']], how='left', left_on='Acronym', right_on='Company')

# Drop the unnecessary columns & rename the merged column
merged.drop(['Company'], axis=1, inplace=True)
merged.rename(columns={'2019-20': '2019-20_from_CSV'}, inplace=True)

# Update the wrz GeoDataFrame with the merged column
wrz['2019-20'] = merged['2019-20_from_CSV']

In [20]:
wrz.loc[1]

AreaServed                                         Severn Trent
ID                                                           15
COMPANY                                      Severn Trent Water
Acronym                                                     SVE
CoType                      regional water and sewerage company
AreaType                              Part of water supply area
Created                                              2020-08-04
LastUpdate                                           2022-05-25
Version                                                     1_4
geometry      MULTIPOLYGON (((351306.3965000832 342962.95345...
2019-20                                                   128.6
Name: 1, dtype: object

In [21]:
#read in land use data 

# Load landuse data
landuse = gpd.read_file(os.path.abspath('data_files/clc2018_uk.shp'))

In [22]:
landuse.loc[1] #uncomment this to see the first record in the geodataframe

OBJECTID_1                                                    2
OBJECTID                                                      2
Shape_Leng                                          6502.250189
ID                                                      UK_NI_2
CODE_18                                                     111
Area_Ha                                               134.98762
Remark                                                      NaN
Shape_Le_1                                                  0.0
Shape_Le_2                                          6504.302749
Shape_Area                                        1350727.59603
geometry      POLYGON ((116313.96509999968 484902.6877999995...
Name: 1, dtype: object

In [23]:
# Load the CSV file
landuse_categories = pd.read_csv('data_files/legend.csv')
# print(landuse_categories.head())  #show a sample of the CSV file 

In [24]:
# merge the csv file with the geodataframe to include the labels for the landuse in the geodataframe
landuse_categories['CODE'] = landuse_categories['CODE'].astype(str)
merged_landuse = pd.merge(landuse, landuse_categories, left_on='CODE_18', right_on='CODE')

# Drop unnecessary columns - this cleans the dataset to make it easier to work with
merged_landuse = merged_landuse.drop(['CODE_18', 'CODE', 'Unnamed: 4', 'Unnamed: 5'], axis=1)
merged_landuse
# Access the 'LABEL' column in the merged DataFrame - LABEL gives the actual landuse description
label_column = merged_landuse['LABEL']

In [25]:
merged_landuse.loc[1]  #show the first row of the merged landuse geodataframe
#merged_landuse.to_file('data_files/merged_landuse.shp', index=False) #save a copy of the geodataframe

OBJECTID_1                                                    2
OBJECTID                                                      2
Shape_Leng                                          6502.250189
ID                                                      UK_NI_2
Area_Ha                                               134.98762
Remark                                                      NaN
Shape_Le_1                                                  0.0
Shape_Le_2                                          6504.302749
Shape_Area                                        1350727.59603
geometry      POLYGON ((116313.96509999968 484902.6877999995...
GRID                                                          1
LABEL                                   Continuous urban fabric
RGB                                                 230-000-077
Name: 1, dtype: object

In [26]:
print(merged_landuse.crs == wrz.crs) # test if the crs is the same 

True


In [27]:
# Perform spatial join between wrz and merged_landuse
join = gpd.sjoin(wrz, merged_landuse, how='inner', predicate='intersects')
# Group by COMPANY and LABEL, and sum the Area_Ha column
grouped = join.groupby(['COMPANY', 'LABEL'])['Area_Ha'].sum().reset_index()

# Create a new GeoDataFrame from the grouped data
company_landuse = gpd.GeoDataFrame(grouped, geometry=gpd.GeoSeries(), crs=wrz.crs)

# Set the geometry of the new GeoDataFrame to the centroid of each LABEL
company_landuse.geometry = company_landuse.apply(lambda x: wrz[wrz['COMPANY'] == x['COMPANY']].geometry.centroid.iloc[0], axis=1)

# Print the resulting GeoDataFrame
# print(company_landuse)
company_landuse.loc[1]

COMPANY                                   Affinity Water
LABEL                                         Bare rocks
Area_Ha                                        46.709683
geometry    POINT (613764.8261605917 223892.77165446547)
Name: 1, dtype: object

In [29]:
# Group by company and select the top three rows with the largest Area_Ha
top_areas = company_landuse.groupby('COMPANY')['Area_Ha'].nlargest(3)

# Get the indices of the selected rows
selected_indices = top_areas.index.levels[1]

# Create a new geodataframe with the selected rows
selected_landuse = company_landuse.loc[selected_indices]

# Convert Area_Ha to Area_Km
selected_landuse['Area_Km'] = (selected_landuse['Area_Ha'] / 100).round(2)

# Filter out rows with LABEL equal to unwanted categories
unwanted_categories = ['Beaches dunes sands', 'Bare rocks', 'Sparsely vegetated areas', 'Burnt areas', 'Glaciers and perpetual snow', 'Inland marshes', 'Peat bogs', 'Salt marshes', 'Salines', 'Intertidal flats', 'Water courses', 'Water bodies', 'Coastal lagoons', 'Estuaries', 'Sea and ocean', 'NODATA', 'UNCLASSIFIED WATER BODIES']
selected_landuse = selected_landuse[~selected_landuse['LABEL'].isin(unwanted_categories)]

# Write the new geodataframe to a shapefile
selected_landuse.to_file('data_files/selected_landuse.shp')
selected_landuse.to_csv('data_files/selected_landuse.csv', index=False)