In [2]:
import os
import numpy as np
import pandas as pd

import rasterio

from datetime import datetime
from rasterio.crs import CRS
from affine import Affine
from netCDF4 import Dataset

In [2]:
def load_tif_stacks(input_dir):
    tif_files = [f for f in os.listdir(input_dir) if f.endswith('.tif')]
    tif_files.sort()
    sublist = []
    counter = 0
    input_name = ''
    path_dict = {}
    
    for tif_name in tif_files:
        name_parts = tif_name.split('.tif')[0].split('_')
        iso_date = name_parts[-1]
        name_parts.pop(-1)
        if len(name_parts) > 1:
            name = '_'.join(name_parts)
        else:
            name = name_parts[0]

        if input_name != name:
            input_name = name
            path_dict[name] = {
                'dates': [],
                'standard_name': name_parts[0],
                'tif_meta': None,
                'upper_limit': None,
                'lower_limit': None,
                'stack': [],
                'paths': []
            }

            if len(name_parts) > 1:
                try:
                    path_dict[name]['upper_limit'] = int(name_parts[-1].split('cm')[0].split('-')[0])
                    path_dict[name]['lower_limit'] = int(name_parts[-1].split('cm')[0].split('-')[1])
                except:
                    continue


                    

        with rasterio.open(os.path.join(input_dir, tif_name)) as src:
            path_dict[name]['tif_meta'] = src.meta
            path_dict[name]['stack'].append(src.read(1))

        path_dict[name]['dates'].append(iso_date)
        path_dict[name]['paths'].append(os.path.join(input_dir, tif_name))


    return path_dict

In [3]:
path_dict = load_tif_stacks('./ftp_download')

In [4]:
path_dict

{'AWD_0-100cm': {'dates': ['2024-06-14',
   '2024-06-15',
   '2024-06-16',
   '2024-06-17',
   '2024-06-18',
   '2024-06-19',
   '2024-06-20',
   '2024-06-21',
   '2024-06-22',
   '2024-06-23'],
  'standard_name': 'AWD',
  'tif_meta': {'driver': 'GTiff',
   'dtype': 'float32',
   'nodata': -3.4028234663852886e+38,
   'width': 259,
   'height': 191,
   'count': 1,
   'crs': CRS.from_epsg(4326),
   'transform': Affine(0.1, 0.0, 4.650000000000006,
          0.0, -0.1, 55.050000000000004)},
  'upper_limit': 0,
  'lower_limit': 100,
  'stack': [array([[-3.4028235e+38, -3.4028235e+38, -3.4028235e+38, ...,
            4.7473812e+00,  9.0766674e-01, -2.5179274e+00],
          [-3.4028235e+38, -3.4028235e+38, -3.4028235e+38, ...,
           -4.8436737e+00, -5.6127849e+00, -1.1220232e+01],
          [-3.4028235e+38, -3.4028235e+38, -3.4028235e+38, ...,
           -6.5563340e+00, -1.0986254e+01, -1.0359383e+01],
          ...,
          [-3.4028235e+38, -6.5413370e+00, -9.0777950e+00, ...,
      

In [97]:
path = path_dict['UTCI']['paths'][0]

with Dataset('./netcdfs/test5.nc', 'w', format='NETCDF4') as dst:
    dst.createDimension('x', path_dict['UTCI']['tif_meta']['width'])
    dst.createDimension('y', path_dict['UTCI']['tif_meta']['height'])
    dst.createDimension('time', len(path_dict['UTCI']['dates']))

    dst.createVariable('x', 'f4', ('x',))
    dst.createVariable('y', 'f4', ('y',))
    dst.createVariable('time', 'f4', ('time',))
    dst.createVariable('UTCI', 'f4', ('time', 'y', 'x'), fill_value=path_dict['UTCI']['tif_meta']['nodata'])

    dst['x'][:] = np.arange(path_dict['UTCI']['tif_meta']['width'])
    dst['y'][:] = np.arange(path_dict['UTCI']['tif_meta']['height'])
    dst['time'][:] = np.arange(len(path_dict['UTCI']['dates']))

    # for i, stack in enumerate(path_dict['UTCI']['stack']):
    #     dst['UTCI'][i] = stack
    dst['UTCI'][:] = np.array(path_dict['UTCI']['stack'])

    dst.setncattr('crs', CRS.from_epsg(4326),)
    dst.setncattr('transform', path_dict['UTCI']['tif_meta']['transform'])
    dst.setncattr('nodata', path_dict['UTCI']['tif_meta']['nodata'])
    if path_dict['UTCI']['upper_limit'] is not None:
        dst.setncattr('upper_limit', path_dict['UTCI']['upper_limit'])
        dst.setncattr('lower_limit', path_dict['UTCI']['lower_limit'])
    dst.setncattr('dates', path_dict['UTCI']['dates'])

    dst.setncattr('history', 'Created ' + datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    # dst.setncattr('source', 'https://www.dwd.de/DE/leistungen/opendata/opendata.html')

In [5]:
import os
from netCDF4 import Dataset
import numpy as np
import rasterio
from rasterio.transform import Affine
from datetime import datetime

def create_netcdf(path_dict, output_dir='./netcdfs_7'):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for name, data in path_dict.items():
        netcdf_file_path = os.path.join(output_dir, f'{name}.nc')

        width = data['tif_meta']['width']
        height = data['tif_meta']['height']
        dates = data['dates']
        num_dates = len(dates)

        # Extract the affine transformation parameters
        transform = data['tif_meta']['transform']

        rows, cols = np.indices((height, width))

        # Apply the affine transform to convert pixel coordinates to geographic coordinates
        x_coords, y_coords = rasterio.transform.xy(transform, rows.flatten(), cols.flatten())
        x_coords = np.array(x_coords)
        y_coords = np.array(y_coords)

        # # Reshape coordinates to match the raster shape
        x_coords = x_coords.reshape((height, width))
        y_coords = y_coords.reshape((height, width))

        y_coords
        lats = y_coords[:, 0]
        lons = x_coords[0, :]

        with Dataset(netcdf_file_path, 'w', format='NETCDF4') as dst:
            dst.createDimension('lon', width)
            dst.createDimension('lat', height)
            dst.createDimension('time', num_dates)

            x_var = dst.createVariable('lon', 'f4', ('lon',))
            y_var = dst.createVariable('lat', 'f4', ('lat',))
            time_var = dst.createVariable('time', 'f4', ('time',))
            utci_var = dst.createVariable('UTCI', 'f4', ('time', 'lat', 'lon'), fill_value=data['tif_meta']['nodata'])

            x_var[:] = lons
            y_var[:] = lats
            time_var[:] = np.arange(num_dates)
            utci_var[:] = np.array(data['stack'])

            # Set CRS attribute
            crs_wkt = data['tif_meta']['crs'].to_wkt()
            dst.setncattr('crs', crs_wkt)
            
            # Set affine transformation attribute
            transform_str = ', '.join(map(str, transform.to_gdal()))
            dst.setncattr('transform', transform_str)

            dst.setncattr('nodata', data['tif_meta']['nodata'])

            if data['upper_limit'] is not None:
                dst.setncattr('upper_limit', data['upper_limit'])
                dst.setncattr('lower_limit', data['lower_limit'])

            dst.setncattr('dates', dates)
            dst.setncattr('history', 'Created ' + datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
           


In [6]:
x = create_netcdf(path_dict)

In [25]:
nc = Dataset('./netcdf_7/UTCI.nc', 'w', format='NETCDF4')

In [19]:
nc

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    crs: GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
    transform: 4.650000000000006, 0.1, 0.0, 55.050000000000004, 0.0, -0.1
    nodata: -3.4028234663852886e+38
    dates: ['2024-06-14', '2024-06-15', '2024-06-16', '2024-06-17', '2024-06-18', '2024-06-19', '2024-06-20', '2024-06-21', '2024-06-22', '2024-06-23']
    history: Created 2024-06-21 15:31:35
    dimensions(sizes): lon(259), lat(191), time(10)
    variables(dimensions): float32 lon(lon), float32 lat(lat), float32 time(time), float32 UTCI(time, lat, lon)
    groups: 

In [26]:
nc.transform = Affine(0.1, 0.0, 4.650000000000006,
          0.0, -0.1, 55.050000000000004)

In [28]:
nc.close()

In [18]:
nc.variables['lat'][:]

masked_array(data=[55. , 54.9, 54.8, 54.7, 54.6, 54.5, 54.4, 54.3, 54.2,
                   54.1, 54. , 53.9, 53.8, 53.7, 53.6, 53.5, 53.4, 53.3,
                   53.2, 53.1, 53. , 52.9, 52.8, 52.7, 52.6, 52.5, 52.4,
                   52.3, 52.2, 52.1, 52. , 51.9, 51.8, 51.7, 51.6, 51.5,
                   51.4, 51.3, 51.2, 51.1, 51. , 50.9, 50.8, 50.7, 50.6,
                   50.5, 50.4, 50.3, 50.2, 50.1, 50. , 49.9, 49.8, 49.7,
                   49.6, 49.5, 49.4, 49.3, 49.2, 49.1, 49. , 48.9, 48.8,
                   48.7, 48.6, 48.5, 48.4, 48.3, 48.2, 48.1, 48. , 47.9,
                   47.8, 47.7, 47.6, 47.5, 47.4, 47.3, 47.2, 47.1, 47. ,
                   46.9, 46.8, 46.7, 46.6, 46.5, 46.4, 46.3, 46.2, 46.1,
                   46. , 45.9, 45.8, 45.7, 45.6, 45.5, 45.4, 45.3, 45.2,
                   45.1, 45. , 44.9, 44.8, 44.7, 44.6, 44.5, 44.4, 44.3,
                   44.2, 44.1, 44. , 43.9, 43.8, 43.7, 43.6, 43.5, 43.4,
                   43.3, 43.2, 43.1, 43. , 42.9, 42

In [14]:

# Open the GeoTIFF file using rasterio
with rasterio.open('./ftp_download/AWD_0-40cm_2024-06-14.tif') as dataset:
    # Read metadata
    meta = dataset.meta
    
    # Read the affine transformation
    transform = dataset.transform
    
    # Read the coordinate reference system
    crs = dataset.crs
    
    # Read the data
    data = dataset.read()  # Reads all bands

    # Read a specific band (for example, the first band)
    band1 = dataset.read(1)
    
    # Get nodata value
    nodata = dataset.nodata
    
    # Get the width and height of the dataset
    width = dataset.width
    height = dataset.height
    
    # Get the number of bands
    count = dataset.count
    
    # Read the bounding box
    bounds = dataset.bounds

# Store everything in a dictionary
geotiff_info = {
    'meta': meta,
    'transform': transform,
    'crs': crs,
    'data': data,
    'band1': band1,
    'nodata': nodata,
    'width': width,
    'height': height,
    'count': count,
    'bounds': bounds,
}

# Now `geotiff_info` contains all the information about the GeoTIFF file
print(geotiff_info)




{'meta': {'driver': 'GTiff', 'dtype': 'float32', 'nodata': -3.4028234663852886e+38, 'width': 259, 'height': 191, 'count': 1, 'crs': CRS.from_epsg(4326), 'transform': Affine(0.1, 0.0, 4.650000000000006,
       0.0, -0.1, 55.050000000000004)}, 'transform': Affine(0.1, 0.0, 4.650000000000006,
       0.0, -0.1, 55.050000000000004), 'crs': CRS.from_epsg(4326), 'data': array([[[-3.40282347e+38, -3.40282347e+38, -3.40282347e+38, ...,
          1.17666445e+01,  8.86395264e+00,  6.19990253e+00],
        [-3.40282347e+38, -3.40282347e+38, -3.40282347e+38, ...,
          4.17536259e+00,  2.97724628e+00, -2.62212873e+00],
        [-3.40282347e+38, -3.40282347e+38, -3.40282347e+38, ...,
          2.37905431e+00, -4.16109848e+00, -3.93445301e+00],
        ...,
        [-3.40282347e+38, -2.22427845e+00, -5.01790047e+00, ...,
         -1.19066644e+00, -1.19066644e+00, -1.19066644e+00],
        [-3.40282347e+38, -1.79855525e+00, -2.24067092e+00, ...,
         -3.40282347e+38, -3.40282347e+38, -3.402823

In [59]:
rows, cols = np.indices((height, width))

# Apply the affine transform to convert pixel coordinates to geographic coordinates
x_coords, y_coords = rasterio.transform.xy(transform, rows.flatten(), cols.flatten())
x_coords = np.array(x_coords)
y_coords = np.array(y_coords)

# # Reshape coordinates to match the raster shape
x_coords = x_coords.reshape((height, width))
y_coords = y_coords.reshape((height, width))

y_coords
lats = y_coords[:, 0]
lons = x_coords[0, :]

In [63]:
y_coords
lats = y_coords[:, 0]
lons = x_coords[0, :]

In [66]:
len(lats)

191

In [3]:
a = Affine(0.1, 0.0, 4.650000000000006,
          0.0, -0.1, 55.050000000000004)

In [6]:
a.identity()

Affine(1.0, 0.0, 0.0,
       0.0, 1.0, 0.0)

In [15]:
nc = Dataset('./netcdfs_4/AWD_0-40cm.nc', 'r', format='NETCDF4')

In [16]:
nc

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    crs: GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
    transform: 4.650000000000006, 0.1, 0.0, 55.050000000000004, 0.0, -0.1
    nodata: -3.4028234663852886e+38
    upper_limit: 0
    lower_limit: 40
    dates: ['2024-06-14', '2024-06-15', '2024-06-16', '2024-06-17', '2024-06-18', '2024-06-19', '2024-06-20', '2024-06-21', '2024-06-22', '2024-06-23']
    history: Created 2024-06-17 17:04:09
    dimensions(sizes): x(259), y(191), time(10)
    variables(dimensions): float32 x(x), float32 y(y), float32 time(time), float32 UTCI(time, y, x)
    groups: 

In [18]:
help(nc)

Help on Dataset object:

class Dataset(builtins.object)
 |  A netCDF `Dataset` is a collection of dimensions, groups, variables and
 |  attributes. Together they describe the meaning of data and relations among
 |  data fields stored in a netCDF file. See `Dataset.__init__` for more
 |  details.
 |
 |  A list of attribute names corresponding to global netCDF attributes
 |  defined for the `Dataset` can be obtained with the
 |  `Dataset.ncattrs` method.
 |  These attributes can be created by assigning to an attribute of the
 |  `Dataset` instance. A dictionary containing all the netCDF attribute
 |  name/value pairs is provided by the `__dict__` attribute of a
 |  `Dataset` instance.
 |
 |  The following class variables are read-only and should not be
 |  modified by the user.
 |
 |  **`dimensions`**: The `dimensions` dictionary maps the names of
 |  dimensions defined for the `Group` or `Dataset` to instances of the
 |  `Dimension` class.
 |
 |  **`variables`**: The `variables` diction

In [41]:
nc['x'][:]

masked_array(data=[ 4.7,  4.8,  4.9,  5. ,  5.1,  5.2,  5.3,  5.4,  5.5,
                    5.6,  5.7,  5.8,  5.9,  6. ,  6.1,  6.2,  6.3,  6.4,
                    6.5,  6.6,  6.7,  6.8,  6.9,  7. ,  7.1,  7.2,  7.3,
                    7.4,  7.5,  7.6,  7.7,  7.8,  7.9,  8. ,  8.1,  8.2,
                    8.3,  8.4,  8.5,  8.6,  8.7,  8.8,  8.9,  9. ,  9.1,
                    9.2,  9.3,  9.4,  9.5,  9.6,  9.7,  9.8,  9.9, 10. ,
                   10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, 10.9,
                   11. , 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8,
                   11.9, 12. , 12.1, 12.2, 12.3, 12.4, 12.5, 12.6, 12.7,
                   12.8, 12.9, 13. , 13.1, 13.2, 13.3, 13.4, 13.5, 13.6,
                   13.7, 13.8, 13.9, 14. , 14.1, 14.2, 14.3, 14.4, 14.5,
                   14.6, 14.7, 14.8, 14.9, 15. , 15.1, 15.2, 15.3, 15.4,
                   15.5, 15.6, 15.7, 15.8, 15.9, 16. , 16.1, 16.2, 16.3,
                   16.4, 16.5, 16.6, 16.7, 16.8, 16

In [35]:
nd = Dataset('./netcdf_7/AWD_0-100cm.nc', 'a', format='NETCDF4')

In [7]:
a = Dataset('./netcdfs/test14.nc', 'r', format='NETCDF4')

In [9]:
a.title

'Chech data'

In [10]:
b = Dataset('/home/colja/01_Code/01_SWN/Spreewasser-N/thredds_data/data/DWD_Data/test14.nc', 'r', format='NETCDF4')

In [11]:
b.title

'Chech data'

In [15]:
import requests

In [21]:
url = "http://127.0.0.1:8080/thredds/ncml/data/DWD_Data/test14.nc"
nc_dict = {}
ncml_data = requests.get(url).text

In [22]:
ncml_data


'<!doctype html><html lang="en"><head><title>HTTP Status 404 – Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:none;}</style></head><body><h1>HTTP Status 404 – Not Found</h1><hr class="line" /><p><b>Type</b> Status Report</p><p><b>Message</b> The requested resource [&#47;thredds&#47;ncml&#47;data&#47;DWD_Data&#47;test14.nc] is not available</p><p><b>Description</b> The origin server did not find a current representation for the target resource or is not willing to disclose that one exists.</p><hr class="line" /><h3>Apache Tomcat/9.0.73</h3></body></html>'

In [20]:
ncml.text

'<!doctype html><html lang="en"><head><title>HTTP Status 404 – Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:none;}</style></head><body><h1>HTTP Status 404 – Not Found</h1><hr class="line" /><p><b>Type</b> Status Report</p><p><b>Message</b> The requested resource [&#47;thredds&#47;ncml&#47;data&#47;DWD_Data&#47;test14.nc] is not available</p><p><b>Description</b> The origin server did not find a current representation for the target resource or is not willing to disclose that one exists.</p><hr class="line" /><h3>Apache Tomcat/9.0.73</h3></body></html>'