# CLIMATE DATA CHALLENGE 2022

## Understanding Drivers of Uncertainty in Future Loss Projections

### Exposure scenario, sensitivity to:

- Popultation increases
- locality
- building type
- cost

In [None]:
# Load libraries

In [None]:
import sys

import numpy as np

import pandas as pd

import ascend
from ascend import shape

import datetime

import iris
import iris.plot as iplt
import iris.quickplot as qplt

#from matplotlib import pyplot as plt
#import matplotlib.cm as cm

In [None]:
# Define data filenames and paths

In [None]:
datadir = "/net/home/h04/jmendes/ClimateDataChallenge/Met-Office-Climate-Data-Challenge-May-2022/data"
fexposure = "UKMO_HACKATHON_EXPOSURE"
lst_countries = ["United Kingdom"]

ssp_datadir = "/data/users/ldawkins/UKCR/DataForPaper/UKSSPs"
ssp1_pop = "population_SSP1_12km.nc"
ssp5_pop = "population_SSP5_12km.nc"
ssp1_urb = "urbanisation_SSP1_12km.nc"
ssp5_urb = "urbanisation_SSP5_12km.nc"

ssps = ["1", "5"]
sensitiv = ["pop", "urb", "urbpop"]

In [None]:
# Functions

In [None]:
def read_file(datadir, fname, country="all"):
    '''
    Read files from input directory.
    
    Args:
        datadir (str): Path to input data dir
        fname (str): Name of CSV file to load
        country (list (str)): Country names to be selected.
                              Default is to load all countries.
    
    Returns:
        Pandas dataframe of input data with 15-17 columns.
    '''

    # define filename
    file = f"{datadir}/{fname}.csv"
    
#    # read file as dataframe and return
#    return pd.read_csv(file, header = 1, names = columns)
    # Make a one column dataframe with each row being a line in the CSV file
    # original file is latin1 encoded
    df = pd.read_fwf(file, header=None, encoding='latin1')
    # split each row on commas and expand the dataframe
    dfmod = df[0].str.split(',', expand=True)
    if country == "all":
        return dfmod
    else:
        return dfmod.loc[dfmod.iloc[:,7].isin(country)]


In [None]:
def convert_time(cube):
    '''
    Convert cube's time coordinate onto decade.
    
    Args:
        cube(iris cube): Input SSP cube.
                         time units are 'hours since 1970-01-01 00:00:00'
                         in Gregorian calendar.
    
    Returns:
        List of decades (datetime year format)
    '''


    decades = []
    for t in cube.coord("time"):
        decades.append(datetime.datetime.fromtimestamp(int(t.points)*3600).year)
    
    return decades

In [None]:
def new_replacecosts(datadir, data):
    '''
    Read replacement costs scenarios and original exposure data,
    update exposure and save as CSV file, for each SSP-sensitivity.
    
    Args:
        datadir (str): Path to input data dir
        data (pandas DataFrame): Original exposure dataframe.
    '''
    
    # run through each SSP and sensitivity and replace costs
    for ssp in ssps:
        for sens in sensitiv:
            file = f"{datadir}/SSP{ssp}_new_replacement_costs_{sens}.npy"
            new_cost = np.load(file)
            data["BuildingsReplacement"] = new_cost
            data.iloc[:,:15].to_csv(f"{datadir}/{fexposure}_UK_SSP{ssp}_{sens}.csv", header=True, index=False)

In [None]:
def new_stories(datadir, data):
    '''
    Read new stories scenarios (based on increase of at least 20% 
    in population) and original exposure data;
    update exposure and save as CSV file.
    
    Args:
        datadir (str): Path to input data dir
        data (pandas DataFrame): Original exposure dataframe.
    '''
    
    # run through each SSP (20% population increase scenario)
    for ssp in ssps:
        file = f"{datadir}/SSP{ssp}_updated_residential_stories.npy"
        new_stories = np.load(file)
        data["Stories"] = new_stories
        data.iloc[:,:15].to_csv(f"{datadir}/{fexposure}_UK_SSP{ssp}_stories.csv", header=True, index=False)

In [None]:
# Analyse exposure dataset

In [None]:
data = read_file(datadir, fexposure, country=lst_countries)

In [None]:
data

In [None]:
# Assign column names
cols_dict = {
    list(data)[0]: "LocationSID",
    list(data)[1]: "LocationName",
    list(data)[2]: "BuildingsReplacement",
    list(data)[3]: "ContentsReplacement",
    list(data)[4]: "BusinessInteruptionReplacement",
    list(data)[5]: "CurrencyCode",
    list(data)[6]: "Territory",
    list(data)[7]: "CountryName",
    list(data)[8]: "PostalCode",
    list(data)[9]: "PostalName",
    list(data)[10]: "Latitude",
    list(data)[11]: "Longitude",
    list(data)[12]: "Stories",
    list(data)[13]: "Occupancy",
    list(data)[14]: "Construction"
}

data.rename(columns=cols_dict, inplace=True)

In [None]:
# Save country data with assign column names
data.iloc[:,:15].to_csv(f"{datadir}/{fexposure}_UK.csv", header=True, index=False)

In [None]:
# Get list of fields:
print("Stories: ", data["Stories"].unique())
print("Occupancy: ", data["Occupancy"].unique())
print("Construction: ", data["Construction"].unique())

In [None]:
# Update replacement costs in the original datafile with new scenarios:
# combinations of: SSP1, SSP5; population, urbanisation, population-urbanisation

In [None]:
# replace original dataset with new scenarios
new_replacecosts(datadir, data)

In [None]:
# Update stories in the original datafile with new scenario:
# >20% increase in population leading to more stories (SSP1)

In [None]:
new_stories(datadir, data)

In [None]:
# Extra scripts to read shape and netcdf files

In [None]:
# Scenario shapefiles

In [None]:
shp = shape.load_shp(f"{datadir}/Control_scenario_UK.shp")
#shape.inspect_shp_info(f"{datadir}/Control_scenario_UK.shp")

In [None]:
#shape.show(shp, bounds=[-20, 40, 35, 72], scale="50m")

In [None]:
# rivers test

In [None]:
ne_dir = "/project/cst/ascend/natural_earth/50m_physical"
river_shapefile = f"{ne_dir}/ne_50m_rivers_lake_centerlines.shp"

In [None]:
#rivers = shape.load_shp(river_shapefile)
#rivers.show(bounds=[-4., 3., 50., 55.], scale='10m')

#rivers_select = [
#    "Ness",
#    "Oich",
#    "Trent",
#    "Severn",
#    "Caledonian Canal",
#    "Thames",
#    "Annalee",
#    "Great Ouse",
#    "Tweed",
#    "Wye",
#    "Swale",
#    "Bann",
#    "Tay",
#    "Blackwater"
#]
#
#for river in rivers_select:
#    river_shape = shape.load_shp(river_shapefile, name=river)[0]
#    river_shape.show(bounds=[-4., 3., 50., 55.], scale='50m')

In [None]:
thames_shape = shape.load_shp(river_shapefile, name='Thames')[0] #select first shape from the list
thames_shape.show(bounds=[-4., 3., 50., 55.], scale='50m')

In [None]:
# SSP netcdf files

In [None]:
# SSP1, population
pop1 = iris.load_cube(f"{ssp_datadir}/{ssp1_pop}")
pop1_decades = convert_time(pop1)

In [None]:
pop1.data

In [None]:
dec = len(pop1_decades)
i = 0
for time in range(0, dec):  # for each aggregated time
    plt.subplot(2,5,i+1)
    qplt.contour(pop1[i])
    plt.title(pop1_decades[i])
    plt.gca().coastlines()
    i += 1

In [None]:
# SSP5, population
pop5 = iris.load_cube(f"{ssp_datadir}/{ssp5_pop}")
pop5_decades = convert_time(pop5)

In [None]:
# SSP1, urbanisation
urb1 = iris.load_cube(f"{ssp_datadir}/{ssp1_urb}")
urb1_decades = convert_time(urb1)

In [None]:
# SSP5, urbanisation
urb5 = iris.load_cube(f"{ssp_datadir}/{ssp5_urb}")
urb5_decades = convert_time(urb5)