# Water Quality Parameters

This notebook aims at computing the descriptive statistics for the WQP maps, or any raster, by defining specific features from which these statistics are requested (located within the [in](./in) folder). In addition, the analysis of the WQP maps will consider de extraction of sampling data inside the pixels to review the estimates.

In [1]:
# Styling notebook

# System
import os
import sys

# Import scripts libraries for the project
sys.path.append('./src/python')

# Import the function to update the notebook style
from nbConfig import (css_styling)

css_styling()

In [2]:
# Data
import numpy as np
import pandas as pd

# Spatial Data
import geopandas as gpd
import rasterio
from rasterstats import zonal_stats

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns
from rasterio.plot import show_hist

# Import custom libraries
import wqpFunctions as wqp

Define the working directory for the WQP processing

In [3]:
# Current Working Directories
cwd = {
    'local': '.',
    'in': './in/data/S3',
    'out': './out/data/S3',
    'vector': './vector',
}

Import the vector files for data extraction of the WQP maps

In [4]:
# Layer dataset
gdf_lakes = gpd.read_file(os.path.join(cwd['vector'],'simile_laghi','simile_laghi.shp'))
# Buoy position in the lake
gdf_buoy = gpd.read_file(os.path.join(cwd['vector'],'boa_sample_points','boa_sample_points.shp'))
# Random sampling points
gdf_sample = gpd.read_file(os.path.join(cwd['vector'],'random_points','random_points.shp'))

Review the contents of the folder

In [6]:
for root, dirs, files in os.walk(cwd['in']):
    for file in files:
        if file.endswith('.tif'):
            print(file)
            f = os.path.join(cwd['in'],file)
            
            # Read file
            src = wqp.wqp(f)
            src.readWQP()
            
            # Export dataset (check no data values)
            t = src.image.read(1)
            t = np.nan_to_num(t)
            t[t<=0] = np.nan
            src.writeWQP(cwd['out'],t)
            
            # Extract information from sampling points
            src.extractSamplePoints(gdf_sample)
            src.samplePoint.to_csv(os.path.join(cwd['out'],'sample_points',src.name+'.csv'))
            
            # Compute statistics for the lakes polygons
            src.computeStatistics(gdf_lakes, 'Nome',"count min mean max median std  percentile_25 percentile_50 percentile_75",0)
            
            # Format output
            df = wqp.wqp.exportWQPFormatEstimates(src)
            
            # Export the statistics result to a file (append data if existing)
            out_file = os.path.join(cwd['out'],'lakesStats.csv')
            if os.path.exists(out_file):
                df.to_csv(os.path.join(cwd['out'],'lakesStats.csv'),mode='a', header=False)
            else:
                df.to_csv(os.path.join(cwd['out'],'lakesStats.csv')) 

S3A_CHL_IT_20200101T100917_L1.tif
S3A_CHL_IT_20200102T094306_L1.tif
S3A_CHL_IT_20200103T091655_L1.tif
S3A_CHL_IT_20200105T100533_L1.tif
S3A_CHL_IT_20200106T093922_L1.tif
S3A_CHL_IT_20200107T091311_L1.tif
S3A_CHL_IT_20200109T100149_L1.tif
S3A_CHL_IT_20200110T093538_L1.tif
S3A_CHL_IT_20200111T090927_L1.tif
S3A_CHL_IT_20200113T095805_L1.tif
S3A_CHL_IT_20200114T093154_L1.tif
S3A_CHL_IT_20200115T090543_L1.tif
461613.1916635568 , 5086884.173586164  coordinates are out of the image
464645.3873948856 , 5071010.463501393  coordinates are out of the image
463505.5298660892 , 5083222.8396813115  coordinates are out of the image
463041.05383571656 , 5086777.778488278  coordinates are out of the image
465008.67724671104 , 5072783.301693505  coordinates are out of the image
462690.8041428262 , 5085455.105893146  coordinates are out of the image
463347.955892079 , 5085993.053614675  coordinates are out of the image
464912.4231471343 , 5083731.017708487  coordinates are out of the image
S3A_CHL_IT_202

In [7]:
#Import the sample dataset
src = wqp.wqp('./in\\S3A_CHL_IT_20200102T094306_L1.tif')
src.readWQP()