In [1]:
import pandas as pd
import xarray as xr
import numpy as np
from netCDF4 import Dataset
import matplotlib.pyplot as plt

In [2]:
import pandas as pd
import xarray as xr
import numpy as np

# File paths
csv_file = r"C:\Users\jmsch\OneDrive\Documenten\Studie\Civiele Techniek\Environmental Engineering\Year 2\Afstuderen\NBS Nakuru Kenia\2. QGIS + Wflow\DATA\2025-03-13 KMD data Ruud\KMD and Lake\KMD_precipitation.csv"
output_nc_file = r"C:\Users\jmsch\OneDrive\Documenten\Studie\Civiele Techniek\Environmental Engineering\Year 2\Afstuderen\NBS Nakuru Kenia\2. QGIS + Wflow\DATA\2025-03-13 KMD data Ruud\KMD_and_Lake_precipitation_2007_2021.nc"

# Read metadata (first 4 rows)
metadata = pd.read_csv(csv_file, delimiter=',', nrows=4, header=None)
station_names = metadata.iloc[0, 1:].values  # Station names (Pts2 to Pts15)
lons = metadata.iloc[1, 1:].astype(float).values  # Longitudes (in degrees)
lats = metadata.iloc[2, 1:].astype(float).values  # Latitudes (in degrees)

# Read precipitation data (starting from the 5th row)
data = pd.read_csv(csv_file, delimiter=',', skiprows=4)
data['DATE'] = pd.to_datetime(data['DATE'], format='%Y%m%d')  # Convert DATE to datetime
data.set_index('DATE', inplace=True)

# Filter data for the years 2007 to 2021
data = data.loc["2007-01-01":"2021-12-31"]

# Create a regular grid for longitude and latitude
lon_min, lon_max = lons.min() - 0.1, lons.max() + 0.1
lat_min, lat_max = lats.min() - 0.1, lats.max() + 0.1
lon_grid = np.linspace(lon_min, lon_max, 100)  # 100 grid points in longitude
lat_grid = np.linspace(lat_min, lat_max, 100)  # 100 grid points in latitude

# Create a 2D grid of precipitation data
precip_grid = np.full((len(data.index), len(lat_grid), len(lon_grid)), np.nan)

# Assign precipitation values to the grid based on station locations
for i, (lon, lat) in enumerate(zip(lons, lats)):
    lon_idx = np.abs(lon_grid - lon).argmin()
    lat_idx = np.abs(lat_grid - lat).argmin()
    precip_grid[:, lat_idx, lon_idx] = data.iloc[:, i].values

# Create an xarray.Dataset
ds = xr.Dataset(
    {
        "precipitation": (("time", "latitude", "longitude"), precip_grid)
    },
    coords={
        "time": data.index,
        "longitude": lon_grid,
        "latitude": lat_grid,
    },
)

# Add metadata
ds["precipitation"].attrs["units"] = "mm/day"
ds.attrs["description"] = "Spatialized precipitation data for Lake Nakuru catchment (2007-2021)"
ds.attrs["source"] = "KMD precipitation data"
ds.attrs["crs"] = "EPSG:4326"  # Add CRS metadata

# Save to NetCDF
ds.to_netcdf(output_nc_file)

print(f"NetCDF file saved to {output_nc_file}")

# Verify the dataset
print(ds)

NetCDF file saved to C:\Users\jmsch\OneDrive\Documenten\Studie\Civiele Techniek\Environmental Engineering\Year 2\Afstuderen\NBS Nakuru Kenia\2. QGIS + Wflow\DATA\2025-03-13 KMD data Ruud\KMD_and_Lake_precipitation_2007_2021.nc
<xarray.Dataset> Size: 438MB
Dimensions:        (time: 5479, latitude: 100, longitude: 100, DATE: 5479)
Coordinates:
    time           (DATE) datetime64[ns] 44kB 2007-01-01 ... 2021-12-31
  * longitude      (longitude) float64 800B 35.79 35.8 35.8 ... 36.29 36.3 36.3
  * latitude       (latitude) float64 800B -0.7374 -0.7318 ... -0.1867 -0.1811
Dimensions without coordinates: DATE
Data variables:
    precipitation  (time, latitude, longitude) float64 438MB nan nan ... nan nan
Attributes:
    description:  Spatialized precipitation data for Lake Nakuru catchment (2...
    source:       KMD precipitation data
    crs:          EPSG:4326


In [72]:
import geopandas as gpd

import pandas as pd
import xarray as xr
import numpy as np
from netCDF4 import Dataset
import matplotlib.pyplot as plt
import rasterio
from rasterio.features import rasterize
from rasterio.transform import from_origin
from shapely.geometry import mapping
from shapely.geometry import Point

In [75]:
stations_df = pd.read_csv(r"C:\Users\jmsch\OneDrive\Documenten\Studie\Civiele Techniek\Environmental Engineering\Year 2\Afstuderen\NBS Nakuru Kenia\2. QGIS + Wflow\DATA\2025-03-13 KMD data Ruud\KMD and Lake\KMD_precipitation.csv", delimiter=',', header=None)

station_names = stations_df.iloc[0, 1:].values  # Station names (Pts2 to Pts15)
precipitation_values = stations_df.iloc[5:, 1:].astype(float).values  # Precipitation values (in mm/day)
print(station_names)
display(precipitation_values)

display(stations_df.head())

lon = stations_df.iloc[1, 1:].astype(float).values  # Longitudes (in degrees)
lat = stations_df.iloc[2, 1:].astype(float).values  # Latitudes (in degrees)

stations_geometry = [Point(lon[i], lat[i]) for i in range(len(lon))]
stations_gdf = gpd.GeoDataFrame({'station': station_names}, geometry=stations_geometry, crs="EPSG:4326")

voronoi_gdf = gpd.read_file(r"C:\Users\jmsch\OneDrive\Documenten\Studie\Civiele Techniek\Environmental Engineering\Year 2\Afstuderen\NBS Nakuru Kenia\2. QGIS + Wflow\DATA\2025-03-13 KMD data Ruud\voronoi_polygons_4326.gpkg")

voronoi_with_stations = gpd.sjoin(voronoi_gdf, stations_gdf, how='left')

start_date = '2007-01-01'
end_date = '2007-12-31'

dates = pd.date_range(start=start_date, end=end_date, freq='D')

station_precip_dict = {station: precipitation_values[:, i] for i, station in enumerate(station_names)}

voronoi_with_stations['precipitation'] = voronoi_with_stations['station'].map(station_precip_dict)
display(voronoi_with_stations.head())
resolution = 0.05
min_lon, min_lat, max_lon, max_lat = voronoi_with_stations.total_bounds
transform = from_origin(max_lat, min_lon, resolution, resolution)

raster_shape = (int((max_lat - min_lat) / resolution), int((max_lon - min_lon) / resolution))
raster_time_series = np.zeros((precipitation_values.shape[0], raster_shape[0], raster_shape[1]))

for t in range(365):
    date_str = dates[t].strftime('%Y-%m-%d')
    with rasterio.open(
        f"C:\\Users\\jmsch\\OneDrive\\Documenten\\Studie\\Civiele Techniek\\Environmental Engineering\\Year 2\\Afstuderen\\NBS Nakuru Kenia\\2. QGIS + Wflow\\DATA\\2025-03-13 KMD data Ruud\\rasterized_precipitation_{date_str}.tif",
        'w',
        driver='GTiff',
        height=raster_shape[0],
        width=raster_shape[1],
        count=1,
        dtype='float32',
        crs='EPSG:4326',
        transform=transform,
    ) as dst:
        dst.write(raster_time_series[t], 1)


['Pts2' 'Pts3' 'Pts4' 'Pts5' 'Pts6' 'Pts7' 'Pts8' 'Pts9' 'Pts10' 'Pts11'
 'Pts12' 'Pts13' 'Pts14' 'Pts15']


array([[0.   , 0.422, 0.303, ..., 0.054, 0.861, 0.141],
       [0.   , 0.285, 0.   , ..., 0.027, 1.239, 0.213],
       [0.   , 0.38 , 0.092, ..., 0.   , 1.598, 0.178],
       ...,
       [0.   , 0.   , 0.   , ..., 0.   , 0.358, 0.   ],
       [0.188, 0.483, 0.405, ..., 0.   , 0.191, 0.77 ],
       [0.   , 0.   , 0.187, ..., 0.322, 0.294, 0.087]])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,ID,Pts2,Pts3,Pts4,Pts5,Pts6,Pts7,Pts8,Pts9,Pts10,Pts11,Pts12,Pts13,Pts14,Pts15
1,LON,35.8903,35.9595,35.9584,35.9432,36.0432,36.0436,36.0482,36.0497,36.1231,36.1489,36.1505,36.1596,36.2014,36.1805
2,LAT,-0.4433,-0.3616,-0.4521,-0.5569,-0.3032,-0.4002,-0.5017,-0.6143,-0.2811,-0.3991,-0.5172,-0.6374,-0.6253,-0.4102
3,in_catchment,yes,yes,yes,yes,yes,yes,yes,yes,yes,yes,yes,yes,yes,yes
4,DATE,Pts2,Pts3,Pts4,Pts5,Pts6,Pts7,Pts8,Pts9,Pts10,Pts11,Pts12,Pts13,Pts14,Pts15


Unnamed: 0,geometry,index_right,station,precipitation
0,"POLYGON ((36.01105 -0.55851, 36.09208 -0.55743...",7.0,Pts9,"[0.519, 0.712, 1.502, 0.033, 0.0, 1.255, 0.0, ..."
1,"POLYGON ((35.91763 -0.49969, 35.98529 -0.5095,...",3.0,Pts5,"[0.628, 1.221, 1.384, 0.001, 0.0, 1.842, 0.0, ..."
2,"POLYGON ((36.01691 -0.45226, 35.98529 -0.5095,...",2.0,Pts4,"[0.303, 0.0, 0.092, 0.0, 2.691, 0.0, 0.004, 0...."
3,"POLYGON ((35.92967 -0.40649, 35.91763 -0.49969...",0.0,Pts2,"[0.0, 0.0, 0.0, 0.0, 1.757, 0.0, 0.0, 0.0, 0.0..."
4,"POLYGON ((36.09676 -0.44865, 36.01691 -0.45226...",5.0,Pts7,"[1.001, 0.896, 1.314, 0.0, 1.229, 0.367, 0.025..."


In [71]:
display(raster_time_series)

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.