In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import shapely.geometry
from shapely.geometry import Point
from mpl_toolkits.basemap import Basemap
import glob
import rasterio
import os
import shutil
import pyimpute
from pathlib import Path

In [37]:
BIO_PATH = "assets/wc2.1_30s_bio/*.tif"
FILE_PATH = "generated_files\gbif.parquet"
raster_features = sorted(glob.glob(BIO_PATH))

In [4]:
# Ocurrence species data -> OSD_df
OSD_df = pd.read_parquet(FILE_PATH)
OSD_df['geometry'] = list(zip(OSD_df["Longitude"], OSD_df["Latitude"]))
OSD_df['geometry'] = OSD_df["geometry"].apply(Point)

# Create the geodataframe
OSD_geoframe = gpd.GeoDataFrame(
    OSD_df,
    crs = {'init': 'epsg:4326'},
    geometry = OSD_df['geometry']
)
OSD_geoframe = OSD_geoframe.to_crs("EPSG:4326")
OSD_geoframe.reset_index(drop=True, inplace = True)

coord_list = [(x,y) for x,y in zip(OSD_geoframe['geometry'].x , OSD_geoframe['geometry'].y)]

  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [5]:
# Point sampling
for f in raster_features:
    src = rasterio.open(f)
    OSD_df[Path(f).stem] = [x for x in src.sample(coord_list)]
    OSD_df[Path(f).stem] = OSD_df[Path(f).stem].astype('float64')

In [8]:
COLUMNS_RENAME_DICT = {
    'wc2.1_30s_bio_1':'01_annual_mean_temp',
    'wc2.1_30s_bio_2':'02_mean_diurnal_range',
    'wc2.1_30s_bio_3':'03_isothermality',
    'wc2.1_30s_bio_4':'04_temperature_seasonality',
    'wc2.1_30s_bio_5':'05_maximum_temp_warmest_month',
    'wc2.1_30s_bio_6':'06_minimum_temp_coldest_month',
    'wc2.1_30s_bio_7':'07_temp_annual_range',
    'wc2.1_30s_bio_8':'08_mean_temp_wettest_quarter',
    'wc2.1_30s_bio_9':'09_mean_temp_driest_quarter',
    'wc2.1_30s_bio_10':'10_mean_temp_warmest_quarter',
    'wc2.1_30s_bio_11':'11_mean_temp_coldest_quarter',
    'wc2.1_30s_bio_12':'12_annual_precipitation',
    'wc2.1_30s_bio_13':'13_precipitation_wettest_month',
    'wc2.1_30s_bio_14':'14_precipitation_driest_month',
    'wc2.1_30s_bio_15':'15_precipitation_seasonality',
    'wc2.1_30s_bio_16':'16_precipitation_of_wettest_quarter',
    'wc2.1_30s_bio_17':'17_precipitation_of_driest_quarter',
    'wc2.1_30s_bio_18':'18_precipitation_of_warmest_quarter',
    'wc2.1_30s_bio_19':'19_precipitation_of_coldest_quarter',
}

In [9]:
OSD_df.rename(columns=COLUMNS_RENAME_DICT, inplace=True)

In [11]:
OSD_df

Unnamed: 0,Pais,Localizacao,Latitude,Longitude,Data,Contagem de individuos,Fonte do registro,Plataforma,Estado,geometry,...,18_precipitation_of_warmest_quarter,19_precipitation_of_coldest_quarter,02_mean_diurnal_range,03_isothermality,04_temperature_seasonality,05_maximum_temp_warmest_month,06_minimum_temp_coldest_month,07_temp_annual_range,08_mean_temp_wettest_quarter,09_mean_temp_driest_quarter
0,BR,PN do Itatiaia--área geral (Partes Baixa e Alta),-22.406586,-44.624233,2010-08-25 00:00:00+00:00,1.0,HUMAN_OBSERVATION,EBIRD,RJ,POINT (-44.624233 -22.406586),...,948.0,103.0,12.091666,60.157547,240.704056,23.000000,2.9,20.100000,16.733334,10.850000
1,BR,PE Campos do Jordão (Horto Florestal),-22.689444,-45.481945,1999-06-03 00:00:00+00:00,1.0,HUMAN_OBSERVATION,EBIRD,SP,POINT (-45.481945 -22.689444),...,844.0,142.0,11.900000,51.965065,311.016174,24.900000,2.0,22.900000,17.683332,10.200000
2,BR,PE Campos do Jordão (Horto Florestal),-22.689444,-45.481945,1999-06-02 00:00:00+00:00,1.0,HUMAN_OBSERVATION,EBIRD,SP,POINT (-45.481945 -22.689444),...,844.0,142.0,11.900000,51.965065,311.016174,24.900000,2.0,22.900000,17.683332,10.200000
3,BR,Hotel Veraneio Hampel grounds,-29.465906,-50.677185,2013-11-28 00:00:00+00:00,2.0,HUMAN_OBSERVATION,EBIRD,RS,POINT (-50.677185 -29.465906),...,483.0,463.0,8.791667,50.818886,319.711914,25.799999,8.5,17.299999,14.833333,18.200001
4,BR,Vicinity of Sao Joaquim,-28.142239,-50.097656,2013-11-27 00:00:00+00:00,2.0,HUMAN_OBSERVATION,EBIRD,SC,POINT (-50.097656 -28.142239),...,426.0,344.0,10.175000,54.411762,311.451599,24.700001,6.0,18.700001,19.166666,13.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2842,BR,Campos do Jordão--área geral,-22.737240,-45.589830,2012-09-09 00:00:00+00:00,1.0,HUMAN_OBSERVATION,EBIRD,SP,POINT (-45.58983 -22.73724),...,855.0,156.0,11.733333,50.574711,321.787170,24.700001,1.5,23.200001,17.283333,9.950000
2843,BR,FLONA São Francisco de Paula,-29.423851,-50.386770,2014-02-28 00:00:00+00:00,2.0,HUMAN_OBSERVATION,EBIRD,RS,POINT (-50.38677 -29.423851),...,551.0,526.0,9.558333,52.808472,313.488129,24.100000,6.0,18.100000,12.850000,16.116667
2844,BR,near Barracao; Espigao Alto,-27.600000,-51.500000,1971-11-26 00:00:00+00:00,1.0,MACHINE_OBSERVATION,ML,RS,POINT (-51.5 -27.6),...,407.0,373.0,10.941667,54.983250,337.392792,26.799999,6.9,19.900000,15.283334,17.116667
2845,BR,São Francisco de Paula; Centro de Pesquisas e ...,-29.477000,-50.170000,2013-11-09 00:00:00+00:00,1.0,MACHINE_OBSERVATION,ML,RS,POINT (-50.17 -29.477),...,526.0,474.0,8.416666,50.099205,305.709534,24.000000,7.2,16.799999,19.366667,16.483334


In [12]:
OSD_df.drop(columns=["geometry"]).to_parquet("generated_files/bio_variables_dataframe_occurence.parquet", index=False)
OSD_df.drop(columns=["geometry"]).to_csv("generated_files/bio_variables_dataframe_occurence.csv", index=False)

In [13]:
from urllib import request
import json

In [42]:
STATES = ["RS","SC","PR","SP","MG","ES","RJ","MS"]
url = 'https://raw.githubusercontent.com/codeforgermany/click_that_hood/main/public/data/brazil-states.geojson'
with request.urlopen(url) as f:
    brazil_states = json.load(f)
    
brazil_states_filtered = [feat for feat in brazil_states['features'] if feat['properties']['sigla'] in STATES]
updated_brazil_states_filtered = {'type':brazil_states['type'],'features':brazil_states_filtered}

In [30]:
coordinates_list = []

for feats in updated_brazil_states_filtered['features']:
    coordinates_list += feats['geometry']['coordinates'][0][0]

In [35]:
dataframe = pd.DataFrame(coordinates_list, columns=["Longitude","Latitude"])

In [39]:
# Point sampling
for f in raster_features:
    src = rasterio.open(f)
    dataframe[Path(f).stem] = [x for x in src.sample(coordinates_list)]
    dataframe[Path(f).stem] = dataframe[Path(f).stem].astype('float64')

In [40]:
dataframe.rename(columns=COLUMNS_RENAME_DICT, inplace=True)

In [41]:
dataframe.to_parquet("generated_files/bio_variables_dataframe_base.parquet", index=False)
dataframe.to_csv("generated_files/bio_variables_dataframe_base.csv", index=False)

In [44]:
# Utilizando somente a feature 1
brazil = gpd.read_file("assets\BR_UF_2022\BR_UF_2022.shp")

In [46]:
brazil = brazil[brazil['SIGLA_UF'].isin(STATES)]

In [50]:
polygon = brazil['geometry'].unary_union

In [51]:
from rasterio.features import shapes
from shapely.geometry import shape, Polygon, Point

In [52]:
with rasterio.open('assets\wc2.1_30s_bio\wc2.1_30s_bio_1.tif') as src:
    # Extrair os metadados
    profile = src.profile
    # Ler os dados raster
    array = src.read(1)

In [53]:
# Converter o polígono para um objeto Shapely
polygon_shapely = shape(polygon)

# Identificar as formas e transformá-las em polígonos
polygons = [polygon_shapely]

In [54]:
mask = rasterio.features.geometry_mask(polygons, out_shape=array.shape, transform=profile['transform'], invert=True)

# Coletar os pontos dentro do polígono
points_within_polygon = []
for idx, val in np.ndenumerate(mask):
    if not val:
        row, col = idx
        lon, lat = src.xy(row, col)
        points_within_polygon.append((lon, lat))

print("Pontos dentro do polígono:", points_within_polygon[:3])

In [None]:
dataframe = pd.DataFrame(points_within_polygon, columns=["Longitude","Latitude"])

In [None]:
dataframe.to_parquet("generated_files/bio_variables_dataframe_base_var_1.parquet", index=False)
dataframe.to_csv("generated_files/bio_variables_dataframe_base_var_1.csv", index=False)