In [1]:
import sys
sys.path.append('../reconstruct_missing_data')

from data_loading import find_data_files, load_data_set, get_anomalies, create_missing_mask, split_and_scale_data

import numpy as np
import xarray as xr
from pathlib import Path
from json import dump, load
import os

In [2]:
# Check current working directory:
print(os.getcwd())

/gxfs_work1/geomar/smomw511/GitHub/MarcoLandtHayen/reconstruct_missing_data/notebooks


In [3]:
# Look for FOCI test data:
find_data_files(data_path='../data/test_data/', data_source_name='FOCI')

[PosixPath('../data/test_data/FOCI/FOCI1.3-SW038_1m_23500101_23591231_grid_T_atmos_grid.nc'),
 PosixPath('../data/test_data/FOCI/FOCI1.3-SW038_echam6_ATM_mm_2350-2359_geopoth_pl_monthly_50000.nc'),
 PosixPath('../data/test_data/FOCI/FOCI1.3-SW038_echam6_BOT_mm_2350-2359_precip_monthly_1.nc'),
 PosixPath('../data/test_data/FOCI/FOCI1.3-SW038_echam6_BOT_mm_2350-2359_slp_monthly_1.nc'),
 PosixPath('../data/test_data/FOCI/FOCI1.3-SW038_echam6_BOT_mm_2350-2359_temp2_monthly_1.nc'),
 PosixPath('../data/test_data/FOCI/FOCI1.3-SW038_echam6_BOT_mm_2350-2359_tsw_monthly_1.nc')]

In [6]:
# Look for FOCI full data:
find_data_files(data_path='../../../../climate_index_collection/data/raw/2022-08-22/', data_source_name='FOCI')

[PosixPath('../../../../climate_index_collection/data/raw/2022-08-22/FOCI/FOCI1.3-SW038_1m_23500101_33491231_grid_T_atmos_grid.nc'),
 PosixPath('../../../../climate_index_collection/data/raw/2022-08-22/FOCI/FOCI1.3-SW038_echam6_ATM_mm_2350-3349_geopoth_pl_monthly_50000_midmonth.nc'),
 PosixPath('../../../../climate_index_collection/data/raw/2022-08-22/FOCI/FOCI1.3-SW038_echam6_BOT_mm_2350-3349_precip_monthly_1_midmonth.nc'),
 PosixPath('../../../../climate_index_collection/data/raw/2022-08-22/FOCI/FOCI1.3-SW038_echam6_BOT_mm_2350-3349_slp_monthly_1_midmonth.nc'),
 PosixPath('../../../../climate_index_collection/data/raw/2022-08-22/FOCI/FOCI1.3-SW038_echam6_BOT_mm_2350-3349_temp2_monthly_1.nc'),
 PosixPath('../../../../climate_index_collection/data/raw/2022-08-22/FOCI/FOCI1.3-SW038_echam6_BOT_mm_2350-3349_tsw_monthly_1_midmonth.nc')]

In [9]:
# Load FOCI test data, including ALL fields and mask for Ocean values:
data_FOCI = load_data_set(data_path='../../../../climate_index_collection/data/raw/2022-08-22/', data_source_name='FOCI')

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


In [8]:
data_FOCI

In [6]:
# Extract single field, here: Sea level pressure
slp_FOCI = data_FOCI['sea-level-pressure'].values
slp_FOCI.shape

(12000, 96, 192)

In [7]:
# Extract single field, here: Sea surface temperature
sst_FOCI = data_FOCI['sea-surface-temperature'].values
sst_FOCI.shape

(12000, 96, 192)

In [9]:
np.sum(np.isnan(sst_FOCI))

65796000

In [10]:
# Select single feature and compute anomalies, using whole time span as climatology:
data = get_anomalies(feature='sea-surface-temperature', data_set=data_FOCI) 

In [11]:
missing_mask = create_missing_mask(data=data, mask_type='fixed', missing_type='discrete', missing_min=0.9, missing_max=0.9, seed=0)

In [23]:
np.sum(np.isnan(data))

0

In [13]:
np.sum(missing_mask,axis=(1,2))

array([1835, 1835, 1835, ..., 1835, 1835, 1835])

In [14]:
# Use sparse data as inputs and complete data as targets. Split sparse and complete data into training and validation sets. 
# Scale or normlalize data according to statistics obtained from only training data.
train_input, val_input, train_target, val_target, train_min, train_max, train_mean, train_std = split_and_scale_data(
    data, 
    missing_mask, 
    train_val_split=0.8, 
    scale_to='zero_one'
)

In [24]:
temp_number = np.sum(train_target==0) + np.sum(val_target==0)

In [19]:
np.size(data)*0.9

199065600.0

In [20]:
(np.sum(train_input==0) + np.sum(val_input==0)) - (np.size(data)*0.9)

6734400.0

In [26]:
(np.sum(train_input!=0) + np.sum(val_input!=0)) 

15384000

In [27]:
(np.size(data) - temp_number)*0.1

15538799.9

In [76]:
np.sum(np.isnan(data))

65796000

In [69]:
data[np.isnan(data)]=0

In [71]:
np.max(data)

13.437347

In [None]:
   

# Extend data, if desired:
data = clone_data(data=data, augmentation_factor=augmentation_factor)

In [None]:
sst_FOCI[!] = 0

In [36]:
ocean = np.repeat(np.expand_dims(is_over_ocean,axis=0),sst_FOCI.shape[0],axis=0)

In [37]:
sst_FOCI[(ocean==False)]=0

In [38]:
np.sum(np.isnan(sst_FOCI))

0

In [35]:
ocean

(12000, 96, 192)

In [55]:
np.sum(np.isnan(data_FOCI['sea-surface-temperature'].where(data_FOCI["is_over_ocean"]).values))

65796000

In [51]:
np.random.seed(0)
np.random.uniform(low=0, high=1, size=(2,3))

array([[0.5488135 , 0.71518937, 0.60276338],
       [0.54488318, 0.4236548 , 0.64589411]])