# Cissa gap filling example - Monte-Carlo component method
We can use CiSSA to fill in gaps of a time-series by making an initial guess for missing values, then iterating through CiSSA fitting until we reach a convergence.

In [None]:
#importing required packages
import pandas as pd
import numpy as np
import copy
import os
import warnings
warnings.filterwarnings('ignore')
dir_path = os.path.abspath('')
os.chdir('../../..')
home_path = os.path.abspath('')
os.chdir('./pycissa/processing/cissa')
from cissa import Cissa
os.chdir(home_path)

In [None]:
## Read data file  (source is the Ministry of Economy of Spain.)
data = pd.read_excel(home_path + r'/data/EPC.xlsx',sheet_name = 'Data')

Here we will replace some data with NaN values to simulate missing values.

In [None]:
data_with_gaps = copy.deepcopy(data)
data_with_gaps['Electric power consumption in Spain'][100] = np.nan
data_with_gaps['Electric power consumption in Spain'][200] = np.nan
data_with_gaps['Electric power consumption in Spain'][300] = np.nan
data_with_gaps['Electric power consumption in Spain'][400] = np.nan
data_with_gaps['Electric power consumption in Spain'][500] = np.nan
print(data_with_gaps['Electric power consumption in Spain'][499:502])

In [None]:
#create the cissa object
cissa_object = Cissa(t=data_with_gaps['Date'],x=data_with_gaps['Electric power consumption in Spain'])

### We can either fix the values before running CiSSA using the pre_fill_gaps function, or use auto_cissa which will do the job as part of the automated process.

We will use monte-carlo method to find significant components. All other components will be thrown away during the gap filling process.  

In [None]:
#here we choose a window length of 5 years. You can try other windown lengths to see how it effects the error rate
#we add some parameters to control the gap filling
cissa_object.auto_cissa(L = 12*5,
                       K_surrogates = 5, alpha = 0.1,  #settings some parameters for the Monte-Carlo routine
                       test_number = 10, test_repeats=10, #These settings help us to understand the error during gap filling. For 10 repeats we remove 10 known points, gap fill, and calculate the error. On the last run all points are included (none removed) 
                       initial_guess = ['previous', 1.25], #this means we set missing values to the previous good value multiplied by 1.25 
                        max_iter = 500, #ensuring we have enough iterations to converge
                        convergence = ['value', 50], #convergence value is 50 - adjust this for your time series
                        verbose = True #just so we can see what is going on...
                       )

## Cool, let's look at the imputed points...

In [None]:
cissa_object.figures.get('cissa').get('figure_gap_fill')

## Let's see the error...

In [None]:
cissa_object.figures.get('cissa').get('figure_gap_fill_error')

## Lets try a smaller value of L, but we will use the pre_fill_gaps function so that we can run CiSSA after with a larger window size.

In [None]:
#create the cissa object
cissa_object_small_L = Cissa(t=data_with_gaps['Date'],x=data_with_gaps['Electric power consumption in Spain'])

In [None]:
#here we choose a window length of 2 years. You can try other windown lengths to see how it effects the error rate
#we add some parameters to control the gap filling
cissa_object_small_L.pre_fill_gaps(L = 12*2,
                       K_surrogates = 5, alpha = 0.1,  #settings some parameters for the Monte-Carlo routine
                       test_number = 10, test_repeats=10, #These settings help us to understand the error during gap filling. For 10 repeats we remove 10 known points, gap fill, and calculate the error. On the last run all points are included (none removed) 
                       initial_guess = ['previous', 1.25], #this means we set missing values to the previous good value multiplied by 1.25 
                        max_iter = 500, #ensuring we have enough iterations to converge
                        convergence = ['value', 50], #convergence value is 50 - adjust this for your time series
                        verbose = True #just so we can see what is going on...
                       )

In [None]:
cissa_object_small_L.figures.get('cissa').get('figure_gap_fill')

In [None]:
cissa_object_small_L.figures.get('cissa').get('figure_gap_fill_error')

In [None]:
print('''COMPONENT VARIANCE''')
print(f'''TREND   : {cissa_object.results.get('cissa').get('noise component tests').get('trend_share')}%''')
print(f'''PERIODIC: {cissa_object.results.get('cissa').get('noise component tests').get('periodic_share')}%''')
print(f'''NOISE   : {cissa_object.results.get('cissa').get('noise component tests').get('noise_share')}%''')

### we could then rerun auto cissa with a larger window

In [None]:
cissa_object_gap_fixed = cissa_object_small_L = Cissa(t=data_with_gaps['Date'],x=cissa_object_small_L.x)
cissa_object_gap_fixed.auto_cissa(L = 12*5,
                       K_surrogates = 5, alpha = 0.1,  #settings some parameters for the Monte-Carlo routine
                       )

In [None]:
cissa_object_gap_fixed.figures.get('cissa').get('figure_split_components')

# Now let's try approximating then removing trend and refitting the gaps...

In [None]:
cissa_object_no_trend = Cissa(t=data_with_gaps['Date'],
                              x=data_with_gaps['Electric power consumption in Spain'] - cissa_object.x_trend)

In [None]:
#here we choose a window length of 5 years. You can try other windown lengths to see how it effects the error rate
#we add some parameters to control the gap filling
cissa_object_no_trend.auto_cissa(L = 12*5,
                       K_surrogates = 5, alpha = 0.1,  #settings some parameters for the Monte-Carlo routine
                       test_number = 10, test_repeats=10, #These settings help us to understand the error during gap filling. For 10 repeats we remove 10 known points, gap fill, and calculate the error. On the last run all points are included (none removed) 
                       initial_guess = ['previous', 1.25], #this means we set missing values to the previous good value multiplied by 1.25 
                        max_iter = 500, #ensuring we have enough iterations to converge
                        convergence = ['value', 50], #convergence value is 50
                        verbose = True #just so we can see what is going on...
                       )

In [None]:
cissa_object_no_trend.figures.get('cissa').get('figure_gap_fill')

In [None]:
cissa_object_no_trend.figures.get('cissa').get('figure_gap_fill_error')