In [22]:
# Load data.
from METCOMP_utils import *

station_ids = ['40013','40010','25754','40003','24768','40005','23470','25786','24856','23658','40004','23659','25652','20949','40145','40007','40143','22234']

# param_dict: Dictionary translating SMHI parameter names to corresponding parameters in reference.
#             Example: param_dict = {'t': 'ref_temperature', 'prec1h': 'ref_precipitation', ...}
param_dict = {'t': 'TM', 'prec1h': 'RR', 'r': 'UM', 'ws': 'FM2'}

start_date = datetime.date(2017, 3, 1)
end_date = datetime.date(2020, 2, 29)
MESAN_data = {}
LANTMET_data = {}
for station in station_ids:
    print('Loading ' + station + '...')
    MESAN_data[station] = read_CSV(station, 'MESAN', start_date, end_date)
    LANTMET_data[station] = read_CSV(station, 'LANTMET', start_date, end_date)

# Unit conversion if needed.
for station in station_ids:
    LANTMET_data[station][param_dict['r']] = LANTMET_data[station][param_dict['r']]/100 # LantMet relative humidity in [0, 100]. Rescale to [0, 1] to follow SMHI convention.
    

Loading 40013...
Loading 40010...
Loading 25754...
Loading 40003...
Loading 24768...
Loading 40005...
Loading 23470...
Loading 25786...
Loading 24856...
Loading 23658...
Loading 40004...
Loading 23659...
Loading 25652...
Loading 20949...
Loading 40145...
Loading 40007...
Loading 40143...
Loading 22234...


In [23]:
import numpy as np
from scipy.stats import pearsonr
# param_dict: Dictionary translating SMHI parameter names to corresponding parameters in reference.
#             Example: param_dict = {'t': 'ref_temperature', 'prec1h': 'ref_precipitation', ...}
param_dict = {'t': 'TM', 'r': 'UM', 'prec1h': 'RR', 'ws': 'FM2'}

stations = ['40013','40010','25754','40003','24768','40005','23470','25786','24856','23658','40004','23659','25652','20949','40145','40007','40143','22234']

table = {'Station': stations, 't': [], 'r': [], 'prec1h': [], 'ws': []}

for station in stations:
    
    print('Working on station ' + station + '...')
    
    # Make copies.
    df_MESAN = MESAN_data[station].copy()
    df_LANTMET = LANTMET_data[station].copy()
    
    for param in param_dict:
        
        # Reset index
        df_MESAN.reset_index(inplace=True, drop=True)
        df_LANTMET.reset_index(inplace=True, drop=True)
        
        print('Finding rows where NaN values exists...')
        bad_rows = []
        for index, _ in df_MESAN.iterrows():
            if np.isnan(df_MESAN.iloc[index][param]) or np.isnan(df_LANTMET.iloc[index][param_dict[param]]):
                print('Found bad row at ' + df_MESAN.iloc[index]['Timestamp'] + '.')
                bad_rows.append(index)
        print('Dropping rows from both datasets (' + str(len(bad_rows)) + ' rows)')
        MESAN_param = df_MESAN.drop(df_MESAN.index[bad_rows]);
        LANTMET_param = df_LANTMET.drop(df_LANTMET.index[bad_rows]);
        
        
        r = round(pearsonr(df_LANTMET[param_dict[param]], df_MESAN[param])[0], 4)
        table[param].append(r)
        
table = pd.DataFrame.from_dict(table)
print(table)
        

Working on station 40013...
Finding rows where NaN values exists...
Dropping rows from both datasets (0 rows)
Finding rows where NaN values exists...
Dropping rows from both datasets (0 rows)
Finding rows where NaN values exists...
Dropping rows from both datasets (0 rows)
Finding rows where NaN values exists...
Dropping rows from both datasets (0 rows)
Working on station 40010...
Finding rows where NaN values exists...
Dropping rows from both datasets (0 rows)
Finding rows where NaN values exists...
Dropping rows from both datasets (0 rows)
Finding rows where NaN values exists...
Dropping rows from both datasets (0 rows)
Finding rows where NaN values exists...
Dropping rows from both datasets (0 rows)
Working on station 25754...
Finding rows where NaN values exists...
Dropping rows from both datasets (0 rows)
Finding rows where NaN values exists...
Dropping rows from both datasets (0 rows)
Finding rows where NaN values exists...
Dropping rows from both datasets (0 rows)
Finding rows w

ValueError: array must not contain infs or NaNs

In [36]:
import numpy as np
from scipy.stats import pearsonr
# param_dict: Dictionary translating SMHI parameter names to corresponding parameters in reference.
#             Example: param_dict = {'t': 'ref_temperature', 'prec1h': 'ref_precipitation', ...}
param_dict = {'t': 'TM', 'r': 'UM', 'prec1h': 'RR', 'ws': 'FM2'}

stations = ['40013','40010','25754','40003','24768','40005','23470','25786','24856','23658','40004','23659','25652','20949','40145','40007','40143','22234']

table = {'Station': stations, 't': [], 'r': [], 'prec1h': [], 'ws': []}



for station in stations:
    
    print('Working on station ' + station + '...')
    
    # Make copies.
    df_MESAN = MESAN_data[station].copy()
    df_LANTMET = LANTMET_data[station].copy()
    
    for param in param_dict:
        
        #print(df_MESAN[param].isnull())
        #print(df_MESAN[param].isnull() | df_LANTMET[param_dict[param]].isnull())
        
        bad_rows = ~(df_MESAN[param].isnull() | df_LANTMET[param_dict[param]].isnull())
        
        MESAN_param = df_MESAN[bad_rows][param]
        LANTMET_param = df_LANTMET[bad_rows][param_dict[param]]
        
        r = round(pearsonr(LANTMET_param, MESAN_param)[0], 4)
        table[param].append(r)

table = pd.DataFrame.from_dict(table)
print(table)

Working on station 40013...
Working on station 40010...
Working on station 25754...
Working on station 40003...
Working on station 24768...
Working on station 40005...
Working on station 23470...
Working on station 25786...
Working on station 24856...
Working on station 23658...
Working on station 40004...
Working on station 23659...
Working on station 25652...
Working on station 20949...
Working on station 40145...
Working on station 40007...
Working on station 40143...
Working on station 22234...
   Station       t       r  prec1h      ws
0    40013  0.9673  0.8967  0.5572  0.7509
1    40010  0.9930  0.9615  0.6691  0.7604
2    25754  0.9964  0.9675  0.6897  0.8239
3    40003  0.9943  0.9643  0.6865  0.7492
4    24768  0.9915  0.9517  0.5677  0.8080
5    40005  0.9941  0.9522  0.6217  0.8547
6    23470  0.9884  0.9385  0.5203  0.8502
7    25786  0.9878  0.9515  0.4270  0.4443
8    24856  0.9923  0.9624  0.6943  0.7630
9    23658  0.9934  0.9577  0.6200  0.3220
10   40004  0.9925  0.9

In [37]:
table.to_csv('CorrelationAllStations.csv', index=False)