## Import libraries

In [2]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import colors
import seaborn as sns

from pandas.api.types import CategoricalDtype

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup



## Notebook config

In [3]:
# Desired damn name ('presa_laboca', 'presa_cerroprieto', 'presa_elcuchillo')
city = 'presa_laboca'

# Analysed hex resolution
r = 12

# Analysis name
index_analysis = 'ndwi'

# Analysed raster data from local directory:
tmp_dir = f'../../../data/processed/tmp_{index_analysis}/'

## Data download

### Data download - Raster Analysis Data

Analized data

In [4]:
#Load data
analysed_raster =gpd.read_file(tmp_dir+'local_save/'+f'{city}_{index_analysis}_HexRes{r}_.geojson')

#Show
print(analysed_raster.shape)
analysed_raster.head(2)

(36345, 11)


Unnamed: 0,hex_id,res,ndwi_mean,ndwi_std,ndwi_median,ndwi_max,ndwi_min,ndwi_diff,ndwi_tend,city,geometry
0,8c48a2c48393bff,12,0.535006,0.3591,0.573144,0.988064,0.147519,0.840545,-0.006303,presa_laboca,"POLYGON ((-100.13353 25.41407, -100.13343 25.4..."
1,8c48a2c484f31ff,12,-0.53673,0.12981,-0.571273,-0.418482,-0.662224,0.243741,0.002016,presa_laboca,"POLYGON ((-100.12104 25.41152, -100.12094 25.4..."


Complete dataset

In [5]:
#Load data
raster_dataset = pd.read_csv(tmp_dir+'local_save/'+f'{city}_{index_analysis}_HexRes{r}_.csv')

#Show
print(raster_dataset.shape)
raster_dataset.head(2)

(3052980, 7)


Unnamed: 0.1,Unnamed: 0,hex_id,res,ndwi,month,year,city
0,0,8c48a2c48393bff,12,0.636882,1,2016,presa_laboca
1,1,8c48a2c484f31ff,12,-0.614095,1,2016,presa_laboca


In [6]:
raster_dataset = raster_dataset.drop(columns=['Unnamed: 0'])
raster_dataset.head(2)

Unnamed: 0,hex_id,res,ndwi,month,year,city
0,8c48a2c48393bff,12,0.636882,1,2016,presa_laboca
1,8c48a2c484f31ff,12,-0.614095,1,2016,presa_laboca


## Data treatment

Data treatment - Create datetime data

In [7]:
#Creates datetime data by adding month (string) + / + year (string)
raster_dataset['date'] = pd.to_datetime(raster_dataset['month'].astype(str) + '/' + raster_dataset['year'].astype(str))

#Strips the day, leaving month and year
raster_dataset['date'] = raster_dataset['date'].apply(lambda x: x.strftime('%Y-%m'))

#Show
print(raster_dataset.shape)
raster_dataset.head(2)

(3052980, 7)


Unnamed: 0,hex_id,res,ndwi,month,year,city,date
0,8c48a2c48393bff,12,0.636882,1,2016,presa_laboca,2016-01
1,8c48a2c484f31ff,12,-0.614095,1,2016,presa_laboca,2016-01


Data treatment - Create raster categories

In [8]:
# Creating vegetation categories
raster_dataset['ndwi_category'] = np.nan

raster_dataset.loc[raster_dataset.ndwi>=0.2 , 'ndwi_category'] = 'Water surface'        
raster_dataset.loc[(raster_dataset.ndwi>=0.0)&
             (raster_dataset.ndwi<0.2), 'ndwi_category'] = 'Flooding, humidity'
raster_dataset.loc[(raster_dataset.ndwi>=-0.3)&
             (raster_dataset.ndwi<0.0), 'ndwi_category'] = 'Moderate drought, non-aqueous surfaces'
raster_dataset.loc[(raster_dataset.ndwi<-0.3), 'ndwi_category'] = 'Drought, non-aqueous surfaces'

# Ordering data
categories = ['Drought, non-aqueous surfaces', 'Moderate drought, non-aqueous surfaces', 'Flooding, humidity', 'Water surface']
raster_dataset['ndwi_category'] = pd.Categorical(raster_dataset['ndwi_category'], categories=categories, ordered=True)

#Show
print(raster_dataset.shape)
raster_dataset.head(2)

(3052980, 8)


Unnamed: 0,hex_id,res,ndwi,month,year,city,date,ndwi_category
0,8c48a2c48393bff,12,0.636882,1,2016,presa_laboca,2016-01,Water surface
1,8c48a2c484f31ff,12,-0.614095,1,2016,presa_laboca,2016-01,"Drought, non-aqueous surfaces"


## Data analysis

### Data analysis - Historical behaviour

Calculate basics for analysis

In [40]:
#Calculate average hex area according to resolution and location (Not currently in use)
#gdf = analysed_raster.to_crs("EPSG:6372")
#gdf['area'] = gdf.area
#average_hexarea = gdf.area.mean()
#print(average_hexarea)

#Calculate average hex area according to resolution and location
available_dates = len(raster_dataset.date.unique())
print(available_dates)

84


Calculate historical area behaviour

In [38]:
#Create DataFrame
cols = ['date', 'ndwi_category', 'hexcount','percentage']
historical_behaviour = pd.DataFrame(columns=cols, index=range(available_dates*4)) # rows = available dates * available categories
historical_behaviour['city'] = city

#Loop for each date
count = 0

for date in raster_dataset.date.unique():
    complete_bydate = raster_dataset.loc[(raster_dataset.date == date)]
    date_count = complete_bydate.shape[0]
    
    #Loop for each raster category
    for cat in categories:
        complete_bydate_cat = complete_bydate.loc[(complete_bydate.ndwi_category == cat)]
        cat_count = complete_bydate_cat.shape[0]
        
        #Register data in DataFrame
        historical_behaviour.loc[count].date = date
        historical_behaviour.loc[count].ndwi_category = cat
        historical_behaviour.loc[count].hexcount = cat_count
        historical_behaviour.loc[count].percentage = (cat_count/date_count)*100     
        
        #Next row in DataFrame
        count = count + 1
        
#Show
print(historical_behaviour.shape)
historical_behaviour

(336, 5)


Unnamed: 0,date,ndwi_category,hexcount,percentage,city
0,2016-01,"Drought, non-aqueous surfaces",22175,61.012519,presa_laboca
1,2016-01,"Moderate drought, non-aqueous surfaces",3153,8.675196,presa_laboca
2,2016-01,"Flooding, humidity",354,0.973999,presa_laboca
3,2016-01,Water surface,10663,29.338286,presa_laboca
4,2016-02,"Drought, non-aqueous surfaces",20831,57.314624,presa_laboca
...,...,...,...,...,...
331,2022-11,Water surface,232,0.638327,presa_laboca
332,2022-12,"Drought, non-aqueous surfaces",17127,47.123401,presa_laboca
333,2022-12,"Moderate drought, non-aqueous surfaces",9209,25.337736,presa_laboca
334,2022-12,"Flooding, humidity",9883,27.192186,presa_laboca
