## Libraries

In [None]:
# Standard library imports
import os
import zipfile
from pathlib import Path

# Third-party imports
import pandas as pd
import cdsapi

## Define Paths

In [2]:
data_dir = Path.cwd().parent / 'data'
climate_data_dir = data_dir / 'climate_data'
lyme_dataset_path = data_dir / 'UKHSA-2017-2022-Lyme-Disease.csv'

## Load Dataset

In [4]:
df = pd.read_csv(lyme_dataset_path)
df.head()

Unnamed: 0,Year,Council,Case,Population,Incidence,Lower_95CI,Upper_95CI
0,2017,Adur,1.0,63721.0,1.56934,0.03973,8.74381
1,2017,Allerdale,3.0,97213.0,3.08601,0.63641,9.01862
2,2017,Amber Valley,4.0,125898.0,3.17718,0.86567,8.13483
3,2017,Arun,9.0,158657.0,5.67262,2.59388,10.76839
4,2017,Ashfield,0.0,126164.0,0.0,0.0,2.92388


## Get Climate Data

In [None]:
def download_uk_climate_data(years: list, output_filename: str = 'uk_climate_data.zip') -> None:
    """
    Download UK climate data using cdsapi
    
    Parameters
    ----------
    years : list
        The list of years as strings, e.g., ['2020', '2021', '2022'].
    output_filename : str
        The name for the output NetCDF file. Default is 'uk_climate_data.zip'.
    """
    c = cdsapi.Client()
    
    print(f"Downloading UK climate data for years: {years}")
    print("This may take several minutes...")
    
    dataset = "reanalysis-era5-land-monthly-means"
    request = {
        "product_type": ["monthly_averaged_reanalysis"],
        "variable": [
            "2m_temperature",
            "total_evaporation",
            "total_precipitation",
            "high_vegetation_cover",
            "type_of_high_vegetation",
        ],
        "year": years,
        "month": ["05", "06", "07"],
        "time": ["00:00"],
        "data_format": "netcdf",
        "download_format": "zip",
        "area": [61, -8, 49, 2],
    }
    
    c.retrieve(dataset, request, output_filename)
    
    print(f"Download complete! Data saved as: {output_filename}")

In [None]:
climate_filename = data_dir / 'climate_data' / 'uk_climate_data.zip'
years = df['Year'].unique().astype(str).tolist()

download_uk_climate_data(years, output_filename=climate_filename)

## Extract Climate Data

In [None]:
def extract_climate_data(zip_filename: str, extract_to: str = 'climate_data') -> None:
    """
    Extracts the contents of a zip file to a specified directory.
    
    Parameters
    ----------
    zip_filename : str
        The name of the zip file to extract.
    extract_to : str
        The directory to extract files into. Default is 'climate_data'.
    """
    with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    
    os.remove(zip_filename)  # Remove the zip file after extraction
    print(f"Extracted files to: {extract_to}")

In [None]:
climate_data_path = climate_data_dir / 'uk_climate_data.zip'

extract_climate_data(climate_data_path, extract_to=climate_data_dir)