In [1]:
%config Completer.use_jedi = False

In [2]:
import pandas as pd
from datetime import datetime
from pykrige.ok import OrdinaryKriging
import numpy as np
import geopandas as gpd
import geobr
from scipy.spatial import KDTree
from shapely.geometry import Point
from scipy.spatial import KDTree
from statistics import mean
from windrose import WindroseAxes
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import xarray as xr
import itertools
plt.style.use("ggplot")

In [3]:
# Caminhos dos diretórios raiz
# general_path = 'C:/Users/cnalm/OneDrive/Hidroweb'
# general_path = 'D:/Dados_Nuvem/OneDrive/Hidroweb'
general_path = 'C:/Users/linde/OneDrive/Hidroweb'

In [6]:
df_total_hdf = pd.read_hdf(general_path + '/Consolidated Files/BRASIL_CLEANED_PARTITION_1961_1970.h5')
df_total_hdf = df_total_hdf.drop_duplicates(ignore_index = True).reset_index(drop = True)
###
# df_total_hdf = df_total_hdf[(df_total_hdf['Date'] == '1961-01-01')]
###
###
df_total_hdf = df_total_hdf[(df_total_hdf['Code'] == '00062000') | (df_total_hdf['Code'] == '00047000') | (df_total_hdf['Code'] == '00048000') | (df_total_hdf['Code'] == '00067000') | (df_total_hdf['Code'] == '00145004')]
###
df_total_hdf.head()

Unnamed: 0,Date,Value,Code,Quality Index,Quality Label
0,1961-01-01,0.0,47000,90.317542,Excellent Quality
1,1961-01-02,0.0,47000,90.317542,Excellent Quality
2,1961-01-03,0.0,47000,90.317542,Excellent Quality
3,1961-01-04,0.0,47000,90.317542,Excellent Quality
4,1961-01-05,0.0,47000,90.317542,Excellent Quality


In [7]:
df_info = pd.read_hdf(general_path + '/Consolidated Files/BRASIL_CLEANED_1961_2020_GAUGES.h5')
df_info.tail(5)

Unnamed: 0,Name,Code,City,State,Responsible,Latitude,Longitude
11022,ÁGUA FRIA,8460003,UIRAMUTA,RORAIMA,ANA,4.6428,-60.4964
11023,UIRAMUTA,8460004,UIRAMUTA,RORAIMA,ANA,4.5986,-60.1664
11024,NOVA ESPERANÇA/MARCO BV-8,8461000,PACARAIMA,RORAIMA,ANA,4.4883,-61.1297
11025,MISSÃO AUARIS - JUSANTE,8464001,BOA VISTA,RORAIMA,ANA,4.0031,-64.4431
11026,FAZENDA BANDEIRA BRANCA,8560000,UIRAMUTA,RORAIMA,ANA,4.6306,-60.4706


In [8]:
df_total = pd.merge(df_total_hdf, df_info, on = ['Code'], how ='left')
df_total = df_total[[
    'Date',
    'Value',
    'Code',
    'Name',
    'City',
    'State',
    'Responsible',
    'Latitude',
    'Longitude'
]]
df_total.tail(5)

Unnamed: 0,Date,Value,Code,Name,City,State,Responsible,Latitude,Longitude
12194,1963-12-27,22.9,145004,TURIAÇU,TURIAÇU,MARANHÃO,INMET,-1.6678,-45.3683
12195,1963-12-28,1.5,145004,TURIAÇU,TURIAÇU,MARANHÃO,INMET,-1.6678,-45.3683
12196,1963-12-29,5.5,145004,TURIAÇU,TURIAÇU,MARANHÃO,INMET,-1.6678,-45.3683
12197,1963-12-30,5.5,145004,TURIAÇU,TURIAÇU,MARANHÃO,INMET,-1.6678,-45.3683
12198,1963-12-31,21.7,145004,TURIAÇU,TURIAÇU,MARANHÃO,INMET,-1.6678,-45.3683


In [9]:
df_coords_temp = df_total[['Latitude',	'Longitude']].drop_duplicates().reset_index(drop=True)
df_coords_temp

Unnamed: 0,Latitude,Longitude
0,-0.65,-47.55
1,-0.7278,-48.5158
2,-0.9711,-62.9286
3,-0.1253,-67.0611
4,-1.6678,-45.3683


In [10]:
# Function for IDW interpolation

# In the IDW methodology, each of the nearest stations
# selected for the interpolation at a query point is weighted
# (Wk) by Wk=d(k)−p, where d is the distance of station k and
# the specified query point. The p values is the power
# parameter that we use p = 2, as suggested by Ly et al.
# (2011) and Xavier et al. (2016).
# (Dirks et al., 1998), Goovaert (2000) and Lloyd (2005) 

def idw_interpolation(row, p=2):
    # Find the indices and distances of the 5 nearest stations 
    step_size = 0.25 / 4

    start_lat = row['Latitude'] - 0.125
    end_lat = row['Latitude'] + 0.125 + step_size  # Add step_size to include the endpoint
    generated_latitudes = [round(start_lat + i * step_size, 6) for i in range(int((end_lat - start_lat) / step_size))]

    start_lon = row['Longitude'] - (0.25 / 2)
    end_lon = row['Longitude'] + (0.25 / 2) + step_size  # Add step_size to include the endpoint
    generated_longitudes = [round(start_lon + i * step_size, 6) for i in range(int((end_lon - start_lon) / step_size))]

    interpolated_value_avg = []

    for lat in generated_latitudes:
        for lon in generated_longitudes:
            # Create a KDTree from latitudes and longitudes
            df_temp_kdtree = df_temp[~((df_temp['Latitude'] == lat) & (df_temp['Longitude'] == lon))]
            locations = df_temp_kdtree[['Latitude', 'Longitude']].values
            kdtree = KDTree(locations)
            distances, indices = kdtree.query([lat, lon], k=5)
            max_distance = 0
            if max(distances) >= max_distance:
                max_distance = max(distances)
            # Compute the inverse distance weights
            weights = 1 / (distances + 1e-6) ** p  # Adding a small value to prevent division by zero
    
            # Get the values at the nearest stations
            values = df_temp_kdtree.iloc[indices]['Value'].values
            # generated_latitudes = df_temp_kdtree.iloc[indices]['Latitude'].values
            # generated_longitudes = df_temp_kdtree.iloc[indices]['Longitude'].values
    
            # Calculate the weighted average
            interpolated_value = np.sum(weights * values) / np.sum(weights)
            interpolated_value_avg.append(interpolated_value)
    # print("max distance", max_distance)
            
    interpolated_value_final = mean(interpolated_value_avg)
    # if interpolated_value_final > 0:
        # print('interpolated_value_avg', interpolated_value_avg)
    print('coordinates', row['Latitude'], row['Longitude'])
    print('distances', distances)
    print("generated_latitudes", generated_latitudes)
    print("generated_longitudes", generated_longitudes)
    return interpolated_value_final

In [11]:
# ref_date = datetime.strptime('2022-11-05', '%Y-%m-%d')
df_date_list = pd.DataFrame(df_total['Date'].drop_duplicates().sort_values()).query("Date >= '1961-01-01'")
date_list = df_date_list['Date'].tolist()
### TESTING | TESTING | TESTING | TESTING | TESTING | TESTING | TESTING
date_list = date_list[0:2]
### TESTING | TESTING | TESTING | TESTING | TESTING | TESTING | TESTING
date_list

[Timestamp('1961-01-01 00:00:00'), Timestamp('1961-01-02 00:00:00')]

In [76]:
### TESTING | TESTING | TESTING | TESTING | TESTING | TESTING | TESTING

# date_test = '2000-01-04'
# df_temp_2 = df_total[(df_total['Date'] == date_test)]
# df_temp_3 = df_temp_2['State'].drop_duplicates()
# print(len(df_temp_3),"estados")
# date_list = [datetime.strptime(date_str, '%Y-%m-%d') for date_str in [date_test]]
# date_list

### TESTING | TESTING | TESTING | TESTING | TESTING | TESTING | TESTING

In [13]:
df_precip_total = pd.DataFrame([])
for ref_date in date_list:
    print("\n",ref_date)
    df_temp = df_total[(df_total['Date'] == ref_date)]
    print(df_temp[['Date', 'Latitude', 'Longitude']],"\n")
    for index, row in df_coords_temp.iterrows():
        latitude = row['Latitude']
        longitude = row['Longitude']
        print(f"index: {index}, Latitude: {latitude}, Longitude: {longitude}")
        
        # step_size = 0.25 / 4

        # start_lat = latitude - 0.125
        # end_lat = latitude + 0.125 + step_size  # Add step_size to include the endpoint
        # generated_latitudes = [round(start_lat + i * step_size, 6) for i in range(int((end_lat - start_lat) / step_size))]

        # start_lon = longitude - (0.25 / 2)
        # end_lon = longitude + (0.25 / 2) + step_size  # Add step_size to include the endpoint
        # generated_longitudes = [round(start_lon + i * step_size, 6) for i in range(int((end_lon - start_lon) / step_size))]

        # print("generated_latitudes", generated_latitudes)
        # print("generated_longitudes", generated_longitudes)

        df_temp_kdtree = df_temp[~((df_temp['Latitude'] == latitude) & (df_temp['Longitude'] == longitude))]
        print(df_temp_kdtree['Latitude'].values.tolist()
              , df_temp_kdtree['Longitude'].values.tolist()
              , "\n")



    # df_precip = df_coords_temp.copy(deep=True)
    # df_precip['Precipitation'] = df_precip.apply(idw_interpolation, axis=1)
    # df_precip['Date'] = ref_date
    # if len(df_precip_total) <=0:



 1961-01-01 00:00:00
            Date  Latitude  Longitude
0     1961-01-01   -0.6500   -47.5500
943   1961-01-01   -0.7278   -48.5158
4441  1961-01-01   -0.9711   -62.9286
7515  1961-01-01   -0.1253   -67.0611
11104 1961-01-01   -1.6678   -45.3683 

index: 0, Latitude: -0.65, Longitude: -47.55
[-0.7278, -0.9711, -0.1253, -1.6678] [-48.5158, -62.9286, -67.0611, -45.3683] 

index: 1, Latitude: -0.7278, Longitude: -48.5158
[-0.65, -0.9711, -0.1253, -1.6678] [-47.55, -62.9286, -67.0611, -45.3683] 

index: 2, Latitude: -0.9711, Longitude: -62.9286
[-0.65, -0.7278, -0.1253, -1.6678] [-47.55, -48.5158, -67.0611, -45.3683] 

index: 3, Latitude: -0.1253, Longitude: -67.0611
[-0.65, -0.7278, -0.9711, -1.6678] [-47.55, -48.5158, -62.9286, -45.3683] 

index: 4, Latitude: -1.6678, Longitude: -45.3683
[-0.65, -0.7278, -0.9711, -0.1253] [-47.55, -48.5158, -62.9286, -67.0611] 


 1961-01-02 00:00:00
            Date  Latitude  Longitude
1     1961-01-02   -0.6500   -47.5500
944   1961-01-02   -0.727

In [78]:
df_precip

Unnamed: 0,Latitude,Longitude
0,-33.625,-53.375
1,-33.375,-53.375
2,-33.375,-53.125
3,-33.125,-53.125
4,-33.125,-52.875
...,...,...
12146,5.375,-60.375
12147,5.375,-60.875
12148,5.375,-60.625
12149,5.375,-60.125
