In [16]:
import requests
import json
import pandas as pd
from datetime import datetime

## Climate API

API extration specification. To create an acount for extraction of data you must follow this link:

    https://dmiapi.govcloud.dk

Following this link you find the list of variables possible to withdraw (the names are almost the same as in the link, but not quite. If they are not the same it prints a list of possibel variables to extract):

    https://confluence.govcloud.dk/pages/viewpage.action?pageId=26476616

This link specifies the possibilities regarding specification of station location, time interval, and further parameter descriptions:

    https://www.dmi.dk/friedata/observationer/

In [34]:
#Defining lists for data
date_time_list = []
stationId_list = []
parameterId_list = []
value_list = []
jason_data = []

In [35]:
### Defining function:
var_list = ['temp_mean_past1h']
#station_list = ['06052', '06119', '06159','06120','06123','06041','06081','06065'] # list of stations can be found here: https://www.dmi.dk/friedata/observationer/
#station_list_DK2 = '06052', '06119', '06159','06120','06123','06041','06081','06065'
station_list_DK1 = ['06041', #skagen
 '06019', #Silstrup
 '06082', #Borris
 '06110', #flyve station skydstrup
 '06074', #Århus syd
 '06123', #Assens/Torø
 '06126', #Årslev
 ]

station_list_DK2 = ['06168', #nakkehoved fyr
                    '06186', #landbohøjskolen
                    '06154', #brandslev
                    '06141', #abed
                    '06190', #bornholm lufthavn
                    ]
# 'bbox' : '7.88,54.85,10.88,57.8' #bbox for Nordpool DK1. A box area can be found here https://boundingbox.klokantech.com/ - use csv format

In [36]:
def get_vejr_data(var, station):
    url = "https://dmigw.govcloud.dk/v2/metObs/collections/observation/items"
    querystring = {'limit' : '100000', #max 300.000
                'parameterId' : var,
                'stationId' : station,
                'api-key' : ''} #Indsæt API nøgle i ''
    response = requests.get(url, params=querystring)
    
    dict = response.json()

    #Appending the data to the lists
    jason_data.append(dict)
    for feature in dict['features']:
        date_time = feature['properties']['observed']
        stationId = feature['properties']['stationId']
        parameterId = feature['properties']['parameterId']
        value = feature['properties']['value']
        
        date_time_list.append(date_time)
        stationId_list.append(stationId)
        parameterId_list.append(parameterId)
        value_list.append(value)
        
    #print(json.dumps(response.json(), indent=4, sort_keys=True))
    #print()

In [37]:
# For loop that runs the gode for variables and stations
for var in var_list:
    for station in station_list_DK2:
        get_vejr_data(var, station)

Description of data extracted

In [38]:
print('The first date in the dataset', date_time_list[0])
print()
print('The last date in the dataset', date_time_list[-1])
print()
print('Length of the data', len(value_list))
print()
print('List of weather stations', list(set(stationId_list)))
print()
print('Following this link you can map the stationIds to a station: https://confluence.govcloud.dk/pages/viewpage.action?pageId=41717704')
print()
print('Variables extracted', list(set(parameterId_list)))

The first date in the dataset 2023-11-07T10:00:00Z

The last date in the dataset 2012-05-25T05:00:00Z

Length of the data 500000

List of weather stations ['06186', '06141', '06154', '06168', '06190']

Following this link you can map the stationIds to a station: https://confluence.govcloud.dk/pages/viewpage.action?pageId=41717704

Variables extracted ['temp_mean_past1h']


Merging data to a panas dataframe

In [39]:
df = pd.DataFrame({'date': date_time_list, 'stationId': stationId_list, 'parameter_name': parameterId_list, 'value': value_list})
df

Unnamed: 0,date,stationId,parameter_name,value
0,2023-11-07T10:00:00Z,06168,temp_mean_past1h,9.8
1,2023-11-07T09:00:00Z,06168,temp_mean_past1h,9.0
2,2023-11-07T08:00:00Z,06168,temp_mean_past1h,8.6
3,2023-11-07T07:00:00Z,06168,temp_mean_past1h,8.4
4,2023-11-07T06:00:00Z,06168,temp_mean_past1h,8.1
...,...,...,...,...
499995,2012-05-25T09:00:00Z,06190,temp_mean_past1h,17.6
499996,2012-05-25T08:00:00Z,06190,temp_mean_past1h,17.1
499997,2012-05-25T07:00:00Z,06190,temp_mean_past1h,16.2
499998,2012-05-25T06:00:00Z,06190,temp_mean_past1h,15.0


Exporting data

In [40]:
output_path = ""
df.to_csv(output_path + 'weather_data_DK1.csv', sep=',', index=False)

In [10]:
# output a json data
json_data = json.dumps(jason_data)
with open(output_path + "data_weather.json", "w") as outfile:
    outfile.write(json_data)

## Calculating the mean accros stations

Reading data

In [63]:
df = pd.read_csv(output_path + 'weather_data_DK2.csv', sep=',', header=0)

Defining function to calculate mean

In [64]:
average_values = df.groupby(['date', 'parameter_name']).mean()
average_values['stationId'] = 'mean'
average_values = average_values.reset_index()
average_values

Unnamed: 0,date,parameter_name,stationId,value
0,2012-03-22T07:00:00Z,temp_mean_past1h,mean,5.200
1,2012-03-22T08:00:00Z,temp_mean_past1h,mean,5.800
2,2012-03-22T09:00:00Z,temp_mean_past1h,mean,5.700
3,2012-03-22T10:00:00Z,temp_mean_past1h,mean,5.900
4,2012-03-22T11:00:00Z,temp_mean_past1h,mean,5.700
...,...,...,...,...
101926,2023-11-07T06:00:00Z,temp_mean_past1h,mean,8.940
101927,2023-11-07T07:00:00Z,temp_mean_past1h,mean,9.140
101928,2023-11-07T08:00:00Z,temp_mean_past1h,mean,9.275
101929,2023-11-07T09:00:00Z,temp_mean_past1h,mean,9.440


In [65]:
average_values.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101931 entries, 0 to 101930
Data columns (total 4 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   date            101931 non-null  object 
 1   parameter_name  101931 non-null  object 
 2   stationId       101931 non-null  object 
 3   value           101931 non-null  float64
dtypes: float64(1), object(3)
memory usage: 3.1+ MB


In [66]:
average_values["value"] = round(average_values["value"],2)
average_values["date"] = pd.to_datetime(average_values["date"])

In [67]:
average_values

Unnamed: 0,date,parameter_name,stationId,value
0,2012-03-22 07:00:00+00:00,temp_mean_past1h,mean,5.20
1,2012-03-22 08:00:00+00:00,temp_mean_past1h,mean,5.80
2,2012-03-22 09:00:00+00:00,temp_mean_past1h,mean,5.70
3,2012-03-22 10:00:00+00:00,temp_mean_past1h,mean,5.90
4,2012-03-22 11:00:00+00:00,temp_mean_past1h,mean,5.70
...,...,...,...,...
101926,2023-11-07 06:00:00+00:00,temp_mean_past1h,mean,8.94
101927,2023-11-07 07:00:00+00:00,temp_mean_past1h,mean,9.14
101928,2023-11-07 08:00:00+00:00,temp_mean_past1h,mean,9.27
101929,2023-11-07 09:00:00+00:00,temp_mean_past1h,mean,9.44


In [68]:
# if one want the full dataset with both average values and actual values form the stations
df = pd.concat([df, average_values])
df

Unnamed: 0,date,stationId,parameter_name,value
0,2023-11-07T10:00:00Z,6168,temp_mean_past1h,9.80
1,2023-11-07T09:00:00Z,6168,temp_mean_past1h,9.00
2,2023-11-07T08:00:00Z,6168,temp_mean_past1h,8.60
3,2023-11-07T07:00:00Z,6168,temp_mean_past1h,8.40
4,2023-11-07T06:00:00Z,6168,temp_mean_past1h,8.10
...,...,...,...,...
101926,2023-11-07 06:00:00+00:00,mean,temp_mean_past1h,8.94
101927,2023-11-07 07:00:00+00:00,mean,temp_mean_past1h,9.14
101928,2023-11-07 08:00:00+00:00,mean,temp_mean_past1h,9.27
101929,2023-11-07 09:00:00+00:00,mean,temp_mean_past1h,9.44


Exporting the new dataset 

In [69]:
path_for_clean_data = ""
average_values.to_csv(path_for_clean_data + 'average_weather_data_DK2.csv', sep=',', index=False)