In [1]:
import pandas as pd, os
from utils.util import is_laptop
from datetime import date

In [2]:
folder_path = 'D:/Skoli/Mastersverkefni/lokaverkefni_vel/data/' if is_laptop() else 'E:/Skóli/HÍ/Vélaverkfræði Master HÍ/Lokaverkefni/data/'
hourly_path = folder_path + 'Measured/combined_klst/' + max(os.listdir(folder_path + 'Measured/combined_klst/'), key = lambda f: os.path.getmtime(folder_path + 'Measured/combined_klst/' + f))
merged_path = folder_path + 'Combined/' + max((f for f in os.listdir(os.path.join(folder_path, 'Combined')) if os.path.isfile(os.path.join(folder_path, 'Combined', f))), key=lambda f: os.path.getmtime(os.path.join(folder_path, 'Combined', f)))

In [18]:
hourly_df = pd.read_feather(hourly_path)
merged_df = pd.read_feather(merged_path)

In [4]:
# Rounds to next hour so as to be able to directly compare with the vedur klst file
def next_hour(time):
    return time.ceil('h')

In [20]:
hourly_df.stod = pd.to_numeric(hourly_df.stod, errors = 'coerce')
hourly_df.timi = pd.to_datetime(hourly_df.timi, errors = 'coerce')
hourly_df.fx = pd.to_numeric(hourly_df.fx, errors = 'coerce')
hourly_df.fg = pd.to_numeric(hourly_df.fg, errors = 'coerce')
hourly_df.f = pd.to_numeric(hourly_df.f, errors = 'coerce')
hourly_df = hourly_df[(hourly_df.fx < 65) & (hourly_df.fg < 75)]
 
merged_df['timi'] = merged_df.time.apply(next_hour)

In [22]:
merged_df.shape

(69467, 30)

In [23]:
combined_df = pd.merge(merged_df, hourly_df, on = ['stod', 'timi'], how = 'inner', suffixes=('_merged', '_hourly'))
combined_df = combined_df.dropna(subset=['fg_hourly'])

In [24]:
combined_df.shape

(67513, 34)

In [25]:
nailStripped_df = combined_df[abs(combined_df.f_merged - combined_df.fx) <= 0.1]
assert nailStripped_df.f_merged.max() < 65

In [29]:
df_toWrite = nailStripped_df[['X', 'Y', 'time', 'ws_15', 'ws_250', 'ws_500', 'wd_15',
       'wd_250', 'wd_500', 'p_15', 'p_250', 'p_500', 't_15', 't_250', 't_500',
       'stod', 'f_merged', 'fg_merged', 'd_merged', 'Ri_01', 'Ri_12', 'Ri_02', 'N_01', 'N_12', 'N_02']]

df_toWrite.columns

Index(['X', 'Y', 'time', 'ws_15', 'ws_250', 'ws_500', 'wd_15', 'wd_250',
       'wd_500', 'p_15', 'p_250', 'p_500', 't_15', 't_250', 't_500', 'stod',
       'f_merged', 'fg_merged', 'd_merged', 'Ri_01', 'Ri_12', 'Ri_02', 'N_01',
       'N_12', 'N_02'],
      dtype='object')

In [30]:
df_toWrite = df_toWrite.rename({'f_merged':'f', 'fg_merged':'fg', 'd_merged':'d'}, axis = 1)

df_toWrite.columns

Index(['X', 'Y', 'time', 'ws_15', 'ws_250', 'ws_500', 'wd_15', 'wd_250',
       'wd_500', 'p_15', 'p_250', 'p_500', 't_15', 't_250', 't_500', 'stod',
       'f', 'fg', 'd', 'Ri_01', 'Ri_12', 'Ri_02', 'N_01', 'N_12', 'N_02'],
      dtype='object')

In [31]:
outputpath = folder_path + f'NailStripped/merged_stripped_{date.today()}.feather'
df_toWrite.to_feather(outputpath)