# Purpose

Purpose of the script:
Obtain hourly timeseries for updated wind speed and air pressure through linear interpolation

# Environment Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import os
import sys

# Linear Interpolation of Supplemental Data

In [None]:
###############################################################################
# Read data and create dataframe
###############################################################################
dir_input = os.path.join("./inputs")
dir_out = os.path.join("./processing")

data_list = ["updated_wind", "air_pressure"]

fname_dict = {"updated_wind": "UpdatedWP3hrApr1915_Dec2015.txt",
              "air_pressure": "AirPressure3hrApr1915_Dec2015.txt"}

# Create date list for index
dt_bgn = datetime(1915, 4, 1, 0, 0)
dt_end = datetime(2015, 12, 31, 21, 0)
index_3hr = pd.date_range(dt_bgn, dt_end, freq = timedelta(minutes = 180))
npt_3hr = len(index_3hr)

dt_bgn = datetime(1915, 4, 1, 0, 0)
dt_end = datetime(2015, 12, 31, 21, 0)
index_1hr = pd.date_range(dt_bgn, dt_end, freq = timedelta(minutes = 60))
npt_1hr = len(index_1hr)

# Read data then perform linear interpolation.
dict_dat = {}
for data in data_list:
  fname = fname_dict[data]
  tmp_3hr = np.loadtxt(os.path.join(dir_input, fname))

  # Make sure the number of data points is consistent
  assert len(tmp_3hr) == npt_3hr
  
  tmp_1hr = np.zeros(npt_1hr)
  ind = 0
  for i in range(0, npt_3hr-1):
    a = tmp_3hr[i]
    b = tmp_3hr[i+1]
    slope = (b-a)/3     #3-hour frequency

    for j in range(0, 3):
      tmp_1hr[ind] = a + slope * j
      ind = ind + 1
  
  # End values are equal (boudns cannot be interpolated)
  tmp_1hr[-1] = tmp_3hr[-1]

  # Append to dictionary
  dict_dat[data] = tmp_1hr

# Create dataframe
df_data = pd.DataFrame(data = dict_dat, index = index_1hr
                               , columns = data_list)  

df_data = df_data.rename(columns={"updated_wind": "updated_wind (int.)",
                          "air_pressure": "air_pressure (int.)"})
print(df_data)

In [None]:
# Shift data by 8 hours
df_data = df_data.shift(-8, axis=0)
print(df_data)

## Apply correction factor to wind

In [None]:
#
# Round up to two decimal places. Change this for more precision
#
df_data = round(df_data, 2)

print(df_data.head())

In [None]:
# Create new column for corrected wind, which will be published.
df_data["Wind [mph]"] = df_data["updated_wind (int.)"]

# Store full index
index_full = df_data.index

In [None]:
# Factor of 2.0
factor = 2.0
index = index_full[(index_full.month>=4) & (index_full.month<=10)]
df_data.loc[index, "Wind [mph]"] = df_data.loc[index, "updated_wind (int.)"].mul(factor)

In [None]:
# Factor of 1.25
factor = 1.25
index = index_full[(index_full.month ==4) & (index_full.day == 1)]
df_data.loc[index, "Wind [mph]"] = df_data.loc[index, "updated_wind (int.)"].mul(factor)

index = index_full[(index_full.month ==10) & (index_full.day == 31)]
df_data.loc[index, "Wind [mph]"] = df_data.loc[index, "updated_wind (int.)"].mul(factor)

In [None]:
# Factor of 1.50
factor = 1.50
index = index_full[(index_full.month == 4) & (index_full.day == 2)]
df_data.loc[index, "Wind [mph]"] = df_data.loc[index, "updated_wind (int.)"].mul(factor)

index = index_full[(index_full.month == 10) & (index_full.day == 30)]
df_data.loc[index, "Wind [mph]"] = df_data.loc[index, "updated_wind (int.)"].mul(factor)

In [None]:
# Factor of 1.75
factor = 1.75
index = index_full[(index_full.month == 4) & (index_full.day == 3)]
df_data.loc[index, "Wind [mph]"] = df_data.loc[index, "updated_wind (int.)"].mul(factor)

index = index_full[(index_full.month == 10) & (index_full.day == 29)]
df_data.loc[index, "Wind [mph]"] = df_data.loc[index, "updated_wind (int.)"].mul(factor)

In [None]:
###############################################################################
# Change column name
###############################################################################
df_print = df_data.copy()

df_print = df_print.drop("updated_wind (int.)", axis=1)

df_print = df_print.rename(columns={"air_pressure (int.)": "Atm_Pres [inHg]"})
df_print.index.name = "Date"

###############################################################################
# Output result - Hourly data
###############################################################################
fname_out = os.path.join(dir_out, "Timeseries_step4_wind,pressure.csv")

df_print.to_csv(fname_out,
                 sep=",",
                 na_rep="NaN",
                 float_format = "%.3f")