In [None]:
import climatedata_functions as climf

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date, timedelta, datetime
from tqdm import tqdm


# Joining the weather data

This notebook joins weather data of different time points (0:00, 6:00, 12:00, and 18:00) for seven days. The weather data format is in a column ('t' for temperature, 'si10' for wind, and 'dswrf' for radiation) containing seven values (day of the sampling, 1, 2, 3, 4, 5, 6 days before sampling). This notebook will put the values in only one column containing all 28 values for one parameter for all dates and time points beginning with the data of 6 days before sampling (0:00, 6:00, 12:00, 18:00). 

## Temperature

In [None]:
metadata = pd.read_csv("../data/metadata_temp_12_01_06_complete_without_a.csv") # Load metadata for geographic locations from a CSV file
features = ['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1', 'Unnamed: 0.1.1.1','longitude_trans','x_grid','y_grid','temp_01'] # Define a list of columns to drop from the metadata DataFrame
temp_00 = pd.read_csv("../data/temp_00.csv") # Load the temperature data for the 00:00 time from a CSV file
temp_18 = pd.read_csv("../data/temp_18.csv") # Load the temperature data for the 18:00 time from a CSV file
metadata.drop(features, axis=1, inplace=True) # Drop the columns defined in the features list from the metadata DataFrame

In [None]:
# Merge the temperature data for 00:00 hour with the metadata based on unique IDs
metadata = metadata.merge(temp_00[['uid','t_0']] , how="inner", on='uid' , validate="1:1")
# Merge the temperature data for 18:00 hour with the metadata based on unique IDs
metadata = metadata.merge(temp_18[['uid','t_18']] , how="inner", on='uid' , validate="1:1")
# Rename columns
metadata = metadata.rename(columns={'temp_06': 't_6','temp_12': 't_12'})

# Define a list of columns to convert from strings to lists of numbers
features = ['t_0', 't_6', 't_12', 't_18']

# Convert the specified columns from strings to lists of numbers
temp = climf.convert_str_to_list(metadata, features)

temp.head(1)

In [None]:
# Write the temperature data to a CSV file
temp.to_csv(f'../data/temperature.csv', index=False)

In [None]:
# Temperature
# Iterate over the rows of the temp DataFrame using a progress bar
for row in (pbar := tqdm(temp.itertuples(), total=len(temp))):
    print(row)                                                   # Print the current row for debugging purposes
    time_ser = climf.join_time_values(row.t_0, row.t_6, row.t_12, row.t_18) # Combine the temperature values for different times into a time series
    temp.loc[temp['uid'] == row.uid, ['temp']] = str(time_ser) # Update the 'temp' column for the current row with the time series as a string

In [None]:
# Drop the columns for individual temperature values, since we have combined them into time series
temp = temp.drop(['t_12', 't_6', 't_0', 't_18'], axis=1)
# Write the resulting DataFrame to a CSV file
temp.to_csv('../data/temperature_series.csv', index=False)
temp.head(1)

### Join with data 14days prior sampling

In [None]:
# Read the CSV file containing the temperature data for the 't_18' time step
temp_before = pd.read_csv("../data/t_18_14_15_d_before.csv")
# Define the list of features to drop from the DataFrame
features = ['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1', 'Unnamed: 0.1.1.1', 'temp_01', 'temp_12', 'temp_06', 'dswrf_0']
# Drop the specified features from the DataFrame
temp_before = temp_before.drop(features, axis = 1)
temp_before.head(1)

In [None]:
# Merge the temperature DataFrame with the 'temp_before' DataFrame on the 'uid', 't_0', 't_6', 't_12', 't_18', 'x_grid', and 'y_grid' columns
temp = temp.merge(temp_before[['uid','t_0', 't_6', 't_12', 't_18', 'x_grid', 'y_grid']] , how="inner", on='uid' , validate="1:1")
temp.head(1)

In [None]:
#there ws one erronous line --> the data for that was dowloaded again with this code:
days = 2  #how many days to go back?
hour = 12 #which our of the day (UTS time!!!)# we want to test: 6,12,18,24
param_layer = ':TMP:surface' # options: ':TMP:surface', ' ":DSWRF:surface"'  surface temperature, #available parameters and layes: https://www.nco.ncep.noaa.gov/pmb/products/hrrr/hrrr.t00z.wrfsfcf00.grib2.shtml
forecast_param = 't'#for temperature: 't', for wind:10maboveground: 'si10', for dswrf: 'dswrf' for name of the column in the metadata and the grib2 data


#from tqdm import tqdm
#pbar.set_description(f"Processing {start_date} --> {temp_list[0][0]}")

from datetime import timedelta
import warnings
warnings.simplefilter(action='ignore')

#make empty list to store the dates already sampled
start_date = temp.date.iloc[4853]#4853
date_index_list = temp.index[temp.date == start_date]#list with all the indexes of dates with the same date
temp_list = [[0] * days for i in range(len(date_index_list))]#make list of lists to store the values inside
start_date = climf.get_start_date(start_date)#formate to time object
print(start_date)
for x in range(days):
    count = 0
    #x = x*2 (if we want to take only every second day)
    day_date = start_date - timedelta(days=x+14)
    #ds, stop = climf.get_ds_aws_array(day_date, hour,param_layer, forecast_param)#getting the temperature array for the specified date
    ds, stop = climf.get_ds_aws_array(day_date, hour,param_layer, forecast_param)#getting the temperature array for the specified date
    for index in date_index_list:
        x_grid = temp.x_grid[index]
        y_grid = temp.y_grid[index]
        if stop == True:
            temp_list[count][x] = np.nan
        else:
            temp_list[count][x] = ds[x_grid][y_grid]
        count += 1
        if x == days-1 and index == date_index_list[len(date_index_list)-1]:#if condition is met put the values in the metadata file
            for i in range(len(temp_list)):                    
                temp[forecast_param+'_'+str(hour)].loc[date_index_list[i]] = temp_list[i] #not index but  


In [None]:
features = ['t_0', 't_6', 't_12', 't_18'] # list of feature names being defined and to be converted to string format
temp = climf.convert_to_str(temp, features) # Convert the specified features in the DataFrame to string format
temp = climf.convert_str_to_list(temp, features) # Convert the specified features in the DataFrame to a list format
temp.head(1)

In [None]:
#temperature
# loop through each row in the temp dataframe
for row in (pbar := tqdm(temp.itertuples(), total=len(temp))):
    print(row)
    # join the temperature values for each time period and store in a string format
    time_ser = climf.join_time_values(row.t_0, row.t_6, row.t_12, row.t_18)
    # add a new column to temp for the joined time series
    temp.loc[temp['uid'] == row.uid, ['temp_14_15d_before']] = str(time_ser) 
temp.head(3)

In [None]:
# Dropping columns that we do not need from the temp dataframe
temp = temp.drop(['t_12', 't_6', 't_0', 't_18', 'x_grid', 'y_grid'], axis=1)
# Saving the resulting temp dataframe to a CSV file
temp.to_csv('../data/temperature_series.csv', index=False)


## Wind

In [None]:
# Read the wind speed data files
wind_0 = pd.read_csv("../data/wind_0.csv")
wind_6 = pd.read_csv("../data/metadata_si10_6_complete.csv")
wind_12 = pd.read_csv("../data/metadata_si10_12_complete.csv")
wind_18 = pd.read_csv("../data/metadata_si10_18_complete.csv")

In [None]:
# merge wind data on uid and validate that there is a 1:1 mapping
wind = wind_0.merge(wind_6[['uid','si10_6']] , how="inner", on='uid' , validate="1:1")
wind = wind.merge(wind_12[['uid','si10_12']] , how="inner", on='uid' , validate="1:1")
wind = wind.merge(wind_18[['uid','si10_18']] , how="inner", on='uid' , validate="1:1")

# convert si10 values to lists for easier manipulation
features = ['si10_0','si10_6','si10_12','si10_18']
wind = climf.convert_str_to_list(wind, features)
wind.head(2)

In [None]:
#wind
# Iterate through rows of the wind dataframe and join wind values at different times for each row
for row in (pbar := tqdm(wind.itertuples(), total=len(wind))):
    # Join wind values at different times into a time series for the current row
    time_ser = climf.join_time_values(row.si10_0, row.si10_6, row.si10_12, row.si10_18)
    # Update the 'wind' column of the current row with the time series as a string
    wind.loc[wind['uid'] == row.uid, ['wind']] = str(time_ser)

In [None]:
# drop the columns with wind speed data
wind = wind.drop(['si10_0','si10_6','si10_12','si10_18'], axis=1)
# save the DataFrame as a CSV file
wind.to_csv('../data/wind_series.csv', index=False)

## Radiation

In [None]:
# Read the csv files for Radiations for differant times of the day at 0,6,12,17 and 18 hours
rad_0 = pd.read_csv("../data/metadata_dswrf_0.csv")
rad_6 = pd.read_csv("../data/metadata_dswrf_6.csv")
rad_12 = pd.read_csv("../data/metadata_dswrf_12.csv")
rad_17 = pd.read_csv("../data/metadata_dswrf_17.csv")
rad_18 = pd.read_csv("../data/metadata_dswrf_18.csv")


In [None]:
# Merge dataframes into one for radiation
rad = rad_17.merge(rad_0[['uid','dswrf_0']] , how="inner", on='uid' , validate="1:1")
#rad = rad.merge(rad_6[['uid','dswrf_6']] , how="inner", on='uid' , validate="1:1")
rad = rad.merge(rad_18[['uid','dswrf_18']] , how="inner", on='uid' , validate="1:1")
#rad = rad.merge(rad_12[['uid','dswrf_12']] , how="inner", on='uid' , validate="1:1")

# Convert radiation values to string and store in list format
features = ['dswrf_0','dswrf_17','dswrf_18']#add later: 'dswrf_12','dswrf_6',
rad = climf.convert_to_str(rad, features)
rad = climf.convert_str_to_list(rad, features)
rad.head(1)

In [None]:
#radiation
# Loop through each row of the rad dataframe using tqdm progress bar
for row in (pbar := tqdm(rad.itertuples(), total=len(rad))):
    #For each row, join the dswrf values for 0, 17, and 18 hours
    time_ser = climf.join_time_values_three(row.dswrf_0, row.dswrf_17, row.dswrf_18)# add laterthe right one for 12 o'clock!!!!!!
    # Store the resulting time series as a string in a new column of the dataframe called 'rad_0_17_18'
    rad.loc[rad['uid'] == row.uid, ['rad_0_17_18']] = str(time_ser)

In [None]:
# Drop unnecessary columns from the rad DataFrame and update the rad variable
features = ['x_grid', 'y_grid','longitude_trans', 'Unnamed: 0.1', 'Unnamed: 0' ]
rad = rad.drop(features, axis=1)
rad.head()

In [None]:
# save dataframe as a CSV file for radiation data
rad.to_csv('../data/radiation.csv', index=False)