In [0]:
from dmi_climate.paths import SECRETS_FILE, BRONZE_DIR
import json
from dmi_climate.extract_functions import extract_dmi_metObs
from pyspark.sql.types import StructType, StructField, StringType, DoubleType
from pyspark.sql.functions import current_timestamp

In [0]:
# Loading dmi api key
with open(SECRETS_FILE) as f:
    secrets = json.load(f)

# Schema for dmi-climate data
schema = StructType([
    StructField('created', StringType(), True),
    StructField('observed', StringType(), True),
    StructField('parameterId', StringType(), True),
    StructField('stationId', StringType(), True),
    StructField('value', DoubleType(), True)
])

df_climate_dmi = spark.createDataFrame([], schema)

# Fetching data from dmi
url = "https://dmigw.govcloud.dk/v2/metObs/collections/observation/items"
parameterIds = ["sun_last1h_glob","temp_mean_past1h","temp_soil_mean_past1h",'sun_last1h_glob']
bbox="12.3,55.6,12.7,56.0"
period='latest-day'
limit=1000
secret_key=secrets["metObs-api-key"]

for i in parameterIds:    
    data = extract_dmi_metObs(i,bbox,period,limit,url,secret_key)
    features = data.get('features')
    records = [item['properties'] for item in features]
    temp_df = spark.createDataFrame(records, schema)
    df_climate_dmi = df_climate_dmi.union(temp_df)

df_climate_dmi = df_climate_dmi.withColumn("ingestion_timestamp", current_timestamp())
df_climate_dmi.write.mode('overwrite').partitionBy("ingestion_timestamp").parquet(f'{BRONZE_DIR}/daily_extract/')


In [0]:
%fs
ls 'dbfs:/mnt/bronze/daily_extract'