In [65]:
#imports
import pandas as pd
from datetime import date, timedelta
import datetime
import os
import requests

This script does the following for each location specified below
1. Reads all mesan csv files created by Grib2CSV.ipynb within the date range specified below and puts them in a pandas dataframe
2. Acceses the SMHI STRÅNG API once for each radiation parameter and adds the data to the data frame.
3. Calculates 2 different temperature humidity index (THI) values for each hour.
4. Calculates the daily mean and max for every parameter
5. Saves 2 csv files. Hourly and daily.

It is assumed that you have alredy run Grib2CSV.ipynb for the points in question.

It is also assumed that you run this script from the same location as Grib2CSV.ipynb, meaning a folder named "MESAN_CSV" should exist in the same folder.

To run this script for all the data created by Grib2CSV.ipynb, you can use the exact same function call as you uesed for Grib2CSV.

It is also possible to run it for a subset of the locations and/or a subrange of the time.

If you choose to run this script on non existing data you will have a bad time.


In [68]:
delta = timedelta(days=1)

#Parameters related to the STRÅNG api. More info at https://opendata.smhi.se/apidocs/strang/
params = [116, 117, 118, 119, 120, 121, 122]
parName = ["CIE UV irradiance", "Global irradiance", "Direct normal irradiance", "PAR", "Direct horizontal irradiance", "Diffuse irradiance"]
interval = "hourly"

def weatherPreProcessing(points, start_date, end_date):
    
    for point in points:
        name = point["id"]
        lat = point["lat"]
        lon = point["lon"]
        
        
        #The following takes every csv file for a specified pseudonym and date range and puts them all in a dataframe
        df = pd.DataFrame() #an empty dataframe
        current_date = start_date
        while current_date <= end_date:
            fname = f"MESAN_{current_date}.csv"
            fpath = os.path.join("MESAN_CSV", name, fname) #OS independent path 
            df = pd.concat([df, pd.read_csv(fpath)]) #Concatenate csv data to the dataframe
            current_date += delta

        #The following adds the six STRÅNG parameters to the data frame   
        for i in range(6):
            param = params[i]
            pname = parName[i]
            
            sDate = start_date.strftime('%Y-%m-%d')
            eDate = end_date.strftime('%Y-%m-%d')
            
            api_url = f"https://opendata-download-metanalys.smhi.se/api/category/strang1g/version/1/geotype/point/lon/{round(lon,6)}/lat/{round(lat,6)}/parameter/{param}/data.json?from={sDate}&to={eDate}&interval={interval}"
            
            #Runs the api call for a single parameter, puts the result in a dataframe and merges it with the main dataframe
            tf = pd.DataFrame(requests.get(api_url).json())
            tf = tf.rename(columns = {"value":pname, "date_time": "Timestamp"})
            df = pd.merge(df, tf, on = "Timestamp")
        
        
        #THI calculation
        #Assuming that t_sfc is dry bulb
        #Assuming that global irradiance is ok to use
            
        #Creating a new column for Celsius
        df["T_C"] = df["t_sfc"] - 273.15

        #Creating new column for combined wind speed
        df["wind_speed"] = (df["v_sfc"]**2 + df["u_sfc"]**2)**0.5    
            
        #Calculating THI and adjusted THI
        df["THI"] = 1.8 * df["T_C"] + 32 + (0.55 - 0.55 * df["r_sfc"]) * (1.8 * df["T_C"] - 26)    #0.55 instead of 0.0055 since the data is decimal instead of percentage 
        df["THI_adj"] = 4.51 + (0.8 * df["T_C"]) + (df["r_sfc"] * (df["T_C"] - 14.4)) + 46.4 - 1.992 * df["wind_speed"] + 0.0068 * df["Global irradiance"] #Aparently the relative humidity is decimal in this case... according to Mader 2006
        
        #Calculating daily mean and max
        df["Timestamp"] = pd.to_datetime(df["Timestamp"])
        dfDailyMean = df.groupby([df['Timestamp'].dt.date]).mean()
        dfDailyMax = df.groupby([df['Timestamp'].dt.date]).max()
        dfDaily = dfDailyMean.join(dfDailyMax, lsuffix="_mean", rsuffix= "_max")
        dfDaily = dfDaily.drop(labels =["Timestamp"], axis = 1)

        #Writing the hourly and daily data
        df.to_csv(f"{name}_weather_hourly.csv", index=False)
        dfDaily.to_csv(f"{name}_weather_daily.csv")

In [69]:
points = [{'id': '24688', 'lat': 55.6689, 'lon': 13.1023}]
start_date = datetime.date(2019, 11, 1)
end_date = datetime.date(2022, 10, 31)
weatherPreProcessing(points, start_date, end_date)

FileNotFoundError: [Errno 2] No such file or directory: 'MESAN_CSV\\24688\\MESAN_2019-11-01.csv'