# Water Quality Parameters

This notebook aims at computing the descriptive statistics for the WQP maps, or any raster, by defining specific features from which these statistics are requested (located within the [in](./in) folder). In addition, the analysis of the WQP maps will consider de extraction of sampling data inside the pixels to review the estimates.

In [None]:
# Styling notebook

# System
import os
import sys

# Import scripts libraries for the project
sys.path.append('./src/python')

# Import the function to update the notebook style
from nbConfig import (css_styling)

css_styling()

In [None]:
# Data
import numpy as np
import pandas as pd

# Spatial Data
import geopandas as gpd
import rasterio
from rasterstats import zonal_stats

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns
from rasterio.plot import show_hist

# Import custom libraries
import wqpFunctions as wqp

Define the working directory for the WQP processing

In [None]:
# Current Working Directories
cwd = {
    'local': '.',
    'in': './in/data/S3',
    'out': './out/data/S3',
    'vector': './vector',
}

Import the vector files for data extraction of the WQP maps

In [None]:
# Layer dataset
gdf_lakes = gpd.read_file(os.path.join(cwd['vector'],'simile_laghi','simile_laghi.shp'))
# Buoy position in the lake
gdf_buoy = gpd.read_file(os.path.join(cwd['vector'],'boa_sample_points','boa_sample_points.shp'))
# Random sampling points
gdf_sample = gpd.read_file(os.path.join(cwd['vector'],'random_points','random_points.shp'))

In [None]:
fig, ax = plt.subplots(figsize=(10,10))
gdf_lakes.plot(ax = ax, color= 'blue', edgecolor='k', alpha = 0.3)
gdf_sample.plot(ax = ax, color='red')
gdf_buoy.plot(ax = ax, color='green')

Review the contents of the folder

In [None]:
print(cwd['in'])
for root, dirs, files in os.walk(cwd['in']):
    
    for file in files:
        if file.endswith('.tif'):
            print(file)
            f = os.path.join(cwd['in'],file)
            
            # Read file
            src = wqp.wqp(f)
            src.readWQP()
            src.name
            
            # Export dataset (check no data values)
#             t = src.image.read(1)
#             t = np.nan_to_num(t)
#             t[t<=0] = np.nan
#             src.writeWQP(cwd['out'],t)
            
            # Extract information from sampling points
            src.extractSamplePoints(gdf_sample)
#             src.extractSamplePoints(gdf_buoy)
            src.samplePoint.to_csv(os.path.join(cwd['out'],'sample_points',src.name+'.csv'))
            
            # Compute statistics for the lakes polygons
            src.computeStatistics(gdf_lakes, 'Nome',"count min mean max median std  percentile_25 percentile_50 percentile_75",0)
            
            # Format output
            df = wqp.wqp.exportWQPFormatEstimates(src)
            
            # Export the statistics result to a file (append data if existing)
            out_file = os.path.join(cwd['out'],'lakesStats_filtered.csv')
            if os.path.exists(out_file):
                df.to_csv(os.path.join(cwd['out'],'lakesStats_filtered.csv'),mode='a', header=False)
            else:
                df.to_csv(os.path.join(cwd['out'],'lakesStats_filtered.csv')) 

In [None]:
n = 'S3A_CHL_IT_20200101T100917_L1.csv'
k = ['id',n.split('_')[1]]
df_1 = pd.read_csv(os.path.join(cwd['out'],'sample_points','wqp_filtered',n))
df_1 = df_1[k]
list_names = ['dismiss','S3A_CHL_IT_20200101T100917_L1'] 
for root, dirs, files in os.walk(os.path.join(cwd['out'],'sample_points','wqp_filtered')):
    for f in files[1:]:
        if f.endswith('.csv'):
            print(f)
            df_2= pd.read_csv(os.path.join(cwd['out'],'sample_points','wqp_filtered',f))
            k = ['id',f.split('_')[1]]
            df_2 = df_2[k]
            df_1 = pd.merge(df_1, df_2,  how='left', left_on=['id'], right_on = ['id'])
            list_names.append(f.split('.')[0])

In [None]:
df_1.head()

In [None]:
df = df_1.transpose()

In [None]:
df.head()

In [None]:
df['names'] = list_names

In [None]:
df[[0,'names']]

In [None]:
df.to_csv(os.path.join(cwd['out'],'sample_points','wqp_tsm_filtered.csv'), encoding='utf-8', decimal='.')

In [None]:
gdf_buoy