In [10]:
from analogue_ensemble import * 
from datetime import datetime
import heliopy.data.omni as omni
from tqdm import tqdm_notebook as tqdm

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
def get_omni_rtn_data(start_time, end_time):
    identifier = 'OMNI_COHO1HR_MERGED_MAG_PLASMA'  # COHO 1HR data
    omni_data = omni._omni(start_time, end_time, identifier=identifier, intervals='yearly', warn_missing_units=False)
    return omni_data
    
def calculate_geoeffectiveness(wind_density: np.ndarray,
                               hmf_intensity: np.ndarray,
                               wind_speed: np.ndarray,
                               hmf_clock_angle: np.ndarray,
                               norm_angle=False) -> np.ndarray:
  # 'norm_angle' refers to the domain of 'hmf_clock_angle'.
  # False: theta ~ [-pi, pi] (standard)
  # True: theta ~ [0, 2pi] (new normalisation, subtract pi to revert)
  alpha = 0.5  # empirically determined

  # Revert domain to [-pi, pi] from [0, 2pi]
  if norm_angle:
    hmf_clock_angle -= np.pi

  geoffectiveness = wind_density**(2/3 - alpha) * \
      hmf_intensity**(2*alpha) * \
      wind_speed**(7/3 - 2 * alpha) * \
      np.sin(hmf_clock_angle / 2)**4

  return geoffectiveness

In [12]:
data_start_time = datetime(1995, 1, 1)
data_end_time = datetime(2019, 12, 31)

omni_data = get_omni_rtn_data(
  data_start_time, data_end_time).to_dataframe()


training_window = 24 
forecast_window = 24
num_analogues = 50

omni_data['HMF_INC'] = np.arctan2(-omni_data['BT'].values, omni_data['BN'].values)
omni_data['G'] = calculate_geoeffectiveness(omni_data["N"], omni_data["ABS_B"], omni_data["V"], omni_data["HMF_INC"])
omni_data = omni_data.interpolate()

In [13]:
# train_prop = 0.6
# val_prop = 0.2
length = len(omni_data)
test_period = range(24, length - 24)
print(test_period)
# test_period = range(int(length * (train_prop + val_prop)), len(omni_data) - 24)
# print(test_period)
VARS_TO_PREDICT = ["N", "ABS_B", "V", "HMF_INC", "G"]
print(omni_data.keys())

range(24, 219119)
Index(['heliographicLatitude', 'heliographicLongitude', 'BR', 'BT', 'BN',
       'ABS_B', 'V', 'elevAngle', 'azimuthAngle', 'N', 'T', 'HMF_INC', 'G'],
      dtype='object')


In [14]:
preds_all = []
obs_all = []
for j, key in enumerate(VARS_TO_PREDICT):
    data = omni_data[key]

    predictions = np.zeros((len(test_period), 24))
    observed = np.zeros((len(test_period), 24))
    for i, forecast_time_start in enumerate(tqdm(test_period)):
        analogue_matrix, analogue_prediction, observed_trend = \
          run_analogue_ensemble(data, forecast_time_start)
        predictions[i] = analogue_prediction[24:]
        observed[i] = np.array(observed_trend[24:])
    preds_all.append(predictions)
    obs_all.append(observed)

    np.save(f'./predictions-{key}', predictions)
    np.save(f'./observed-{key}', observed)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i, forecast_time_start in enumerate(tqdm(test_period)):


HBox(children=(FloatProgress(value=0.0, max=219095.0), HTML(value='')))

  return np.nanmean(a, axis, out=out, keepdims=keepdims)



KeyboardInterrupt: 

In [6]:
preds_all = np.transpose(np.stack(preds_all), (1, 2, 0))
obs_all = np.transpose(np.stack(obs_all), (1, 2, 0))

### NAN Removal

In [7]:
omni_data_orig = get_omni_rtn_data(
  data_start_time, data_end_time).to_dataframe()

omni_data_orig['HMF_INC'] = np.arctan2(-omni_data_orig['BT'].values, omni_data_orig['BN'].values)

multi_array = np.array(omni_data_orig[VARS_TO_PREDICT])[test_period]
inputs = np.array([multi_array[i:i + training_window] 
                       for i in range(len(multi_array) - 2 * training_window + 1)])
outputs = np.array([multi_array[i + training_window:i + 2 * training_window] 
                        for i in range(len(multi_array) - 2 * training_window + 1)])

nan_check = np.array([multi_array[i:i + 2 * training_window] 
                          for i in range(len(multi_array) - 2 * training_window + 1)])

In [8]:
preds_all = preds_all[np.where([~np.any(np.isnan(i)) for i in nan_check])]
obs_all = obs_all[np.where([~np.any(np.isnan(i)) for i in nan_check])]

### Final MAE and MSE

In [9]:
print("MAE:", np.mean(np.abs(preds_all - obs_all)))
print("MSE:", np.mean((preds_all - obs_all) ** 2))

MAE: 2.5631750299562683
MSE: 34.93773735769931
