In [12]:
################################################################################
# This script will be used to add the koppen-geiger climate classification to
# the dataset. The script will do the following:
# 1. Load the dataset
# 2. Cycle through each row in the dataset and get the latitude, longitude, and date
# 3. Use the latitude and longitude to get the climate classification from the corresponding time period
# 4. Add the climate classification to the dataset
# 5. Read in the legend and convert the climate classification from a number to a string (A, B, C, D, E)
# 6. One-hot encode the climate classification
# 7. Save the dataset
################################################################################

In [13]:
# Import libraries
from glob import glob
import os
import pandas as pd
import geopandas as gpd
import rasterio as rio

In [14]:
# Load the dataset
def loadDataset(path):
    data = pd.read_csv(path)
    df = gpd.GeoDataFrame(data, geometry=gpd.points_from_xy(data.lon, data.lat))
    return df

In [30]:
# Read in the climate classification raster directories and assign them to the corresponding time period in
# a dictionary. Storing the specific raster to a key represeting the time period max min. The following will be a list:
# 1. The raster object
# 2. The first and only band of the raster as a numpy array
def readRasters():
    # Create a dictionary to hold the rasters
    files = glob('*') # Get all the files in the current directory
    files.remove('2041_2070') # Remove the directories that are future projections
    files.remove('2071_2099') # Remove the directories that are future projections
    rasters = {}
    for file in files:
        if os.path.isdir(file):
            with rio.open(file + r'\koppen_geiger_0p1.tif') as src:
                rasters[(int(file.split('_')[0]), int(file.split('_')[1]))] = [src, src.read(1)] 
        else:
            continue
    return rasters
    

In [35]:
readRasters()[(1901, 1930)][0].crs.to_string()

'EPSG:4326'