In [1]:
import os
import glob
import pandas as pd

### Load all measurement files

In [11]:
weather_stations_url = "https://data.geo.admin.ch/ch.meteoschweiz.klima/nbcn-tageswerte/liste-download-nbcn-d.csv"
weather_stations = pd.read_csv(weather_stations_url, sep=";", nrows=29, encoding='ANSI')

In [None]:
weather_stations_urls = weather_stations["URL Previous years (verified data)"].tolist()

In [4]:
combined_csv = pd.concat([pd.read_csv(f, sep=";", header=None, skiprows=[0]) for f in weather_stations_urls]) 

In [5]:
combined_csv.tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
57704,STG,20211227,49,0,-,915.2,1.6,221,5.4,3.3,6.6,81.6
57705,STG,20211228,18,0,-,914.6,3.6,7,6.7,4.3,10.0,76.3
57706,STG,20211229,11,0,-,923.0,20.3,0,7.2,5.2,10.9,83.8
57707,STG,20211230,33,0,-,932.9,0.3,8,11.3,9.8,12.6,85.0
57708,STG,20211231,68,0,-,935.1,0.0,360,10.5,7.9,15.3,68.4


### Clean Data

In [6]:
## override headers
combined_csv.columns = ['weatherstation_key', 'date', 'global_radiation', 
                        'snowdepth', 'cloudcover', 'airpressure', 'rainfall', 
                        'sunshineduration', 'airtemp_mean', 'airtemp_min', 
                        'airtemp_max', 'airhumidity']

In [7]:
## replace "-" with None 
for column in combined_csv.columns:
    combined_csv[column].replace({"-": None,}, inplace=True)

In [8]:
## convert date to expected format
def convert_to_date(date):
    dateString = str(date)
    year = dateString[:4]
    month = dateString[4:6].zfill(2) 
    day = dateString[6:8].zfill(2)
    return f"{year}-{month}-{day}"
combined_csv['date']= combined_csv['date'].apply(convert_to_date)

In [9]:
combined_csv.head()

Unnamed: 0,weatherstation_key,date,global_radiation,snowdepth,cloudcover,airpressure,rainfall,sunshineduration,airtemp_mean,airtemp_min,airtemp_max,airhumidity
0,ALT,1864-01-01,,,,953.2,0,,1.8,,,
1,ALT,1864-01-02,,,,963.5,4,,-7.0,,,
2,ALT,1864-01-03,,,,967.6,0,,-11.3,,,
3,ALT,1864-01-04,,,,968.3,0,,-9.9,,,
4,ALT,1864-01-05,,,,970.0,0,,-7.6,,,


### Export to csv

In [10]:
combined_csv.to_csv( "weather_stations_measurement_data.csv", index=False, encoding='utf-8-sig', line_terminator='\n')