## Component Reconstruction - Data characteristic driven modeling
### Understand key hidden characteristics of the decomposed IMFs
### Reconstruct them into more meaningful components 

In [1]:
## Data characteristics - Complexity Characteristics Measurement, Main pattern characteristics
## Reconstruction rules - Add similar ones, maintain seperately

import math
import csv

## Read the decomposed intrinsic mode functions

nIMFs = 9 # From the decomposition method
eIMFs = []
for i in range(0,nIMFs):
    eIMFs.append([]) # Array for each intrinsic mode function
    
#print(eIMFs)
    
with open("Intrinsic Mode Functions.csv","r") as f:
    reader = csv.reader(f)
    for row in reader:
        for i in range(0,len(row)):
            eIMFs[i].append(float(row[i]))
f.close()

print(len(eIMFs[4]))
    

1695


### Complexity Characteristics Measurement : Permutation Entropy

In [4]:
#from pyentrp import entropy as en

## Embedding Dimension m and time delay
m = 3
tau = 1
## Recommended relationship is m! << N: where N is the number of data points in the time series

def factorial(x):
    
    fac = 1
    for i in range(1,x):
        fac = fac*(i+1)
        
    return(fac)

fac = factorial(m)
print(fac)

# sort the index based on the sorting of the array 
def bubble_sort(arr):
    
    index = [0,1,2]
    for i in range(0,m-1):
        for j in range(0,m-i-1):
            if arr[j]>arr[j+1]:
                # swap the element in the array
                
                temp1 = arr[j]
                arr[j] = arr[j+1]
                arr[j+1] = temp1
                
                # swap the index elements 
                
                temp2 = index[j]
                index[j] = index[j+1]
                index[j+1] = temp2
                
    #print(arr)
    return(index)

# Function to find the complexity characteristic measurement of each Intrinsic Mode Functions

l = len(eIMFs[0])

def Complexity_Characteristic_Measurement(IMF):
    
    ## There are m data points in the decomposed time series or IMFs and N-(m-1)tau time series vectors.
    ## Now for each of these time series vectors we arrange them in ascending order
    ## We evaluate the frequency of occurence of each permutation index

    embedded_ts = []
    for i in range(0,l-(m-1)*tau):
        temp = []
        for j in range(0,m):
            temp.append(IMF[i+j*tau])
        embedded_ts.append(temp)

    print("Number of observations: ",len(embedded_ts))
    
    # frequency of occurence
    freq = [0 for i in range(0,factorial(m))]
    template = [[0,1,2],[0,2,1],[1,0,2],[1,2,0],[2,1,0],[2,0,1]]
    
    for i in embedded_ts:
        ind = bubble_sort(i)
        for j in template:
            if ind == j:
                pos = template.index(j)
                break
        freq[pos] = freq[pos] + 1
        
    print("Frequency: ",freq)
    sm = 0
    for x in freq:
        sm = sm + x
    print("Number of observations: ",sm)
    
    ## Probabilities:
    prob = [x/sm for x in freq]
    print("Probability of embedding dimensions: ",prob)
    sn = 0
    for x in prob:
        sn = sn + x
    print("Probability Sum: ",sn)
    
    ## Calculate Permutation Entropy:
    ## PE = -Sum(p*log(p)) 

    PE = 0
    for x in prob:
        if x == 0:
            PE = PE - 0
        else:
            PE = PE - x*(math.log(x,math.e))
    print("Permulation Entropy of IMF: ",PE)
    
    ## Calculate Normalized Permutation Entropy:
    ## PE = -Sum(p*log(p)) 

    N_PE = PE/(math.log(factorial(m),math.e))
    print("Normalized Permulation Entropy of IMF: ",N_PE)
    return(N_PE)

6


In [4]:
# Deciding the Threshold for theta for assessing complexity from the resulting PE value
# PE value has to be calculated for the components and not directly from the time series

theta = 0.5 # Complexity Threshold as proposed in the research paper
complexity = [] # 0 indicates low complexity, 1 indicates High complexity
for k in range(0,nIMFs):
    NPE = Complexity_Characteristic_Measurement(eIMFs[k]) # Returns the Normalized 
    if NPE < 0.5:
        complexity.append(0)
    else:
        complexity.append(1)
    print(" ")

Number of observations:  1693
Frequency:  [252, 288, 296, 311, 227, 319]
Number of observations:  1693
Probability of embedding dimensions:  [0.1488481984642646, 0.17011222681630242, 0.1748375664500886, 0.1836975782634377, 0.1340815121086828, 0.18842291789722387]
Probability Sum:  1.0
Permulation Entropy of IMF:  1.784918718071506
Normalized Permulation Entropy of IMF:  0.9961821040859373
 
Number of observations:  1693
Frequency:  [541, 150, 158, 134, 568, 142]
Number of observations:  1693
Probability of embedding dimensions:  [0.3195510927347903, 0.08860011813349085, 0.09332545776727702, 0.07914943886591849, 0.33549911399881865, 0.08387477849970466]
Probability Sum:  1.0
Permulation Entropy of IMF:  1.5756702111968917
Normalized Permulation Entropy of IMF:  0.8793982888092333
 
Number of observations:  1693
Frequency:  [709, 75, 74, 67, 701, 67]
Number of observations:  1693
Probability of embedding dimensions:  [0.41878322504430004, 0.044300059066745424, 0.04370939161252215, 0.0395

In [3]:
for i in range(0,nIMFs):
    if complexity[i] == 0:
        print("Intrinsic Mode Function ",i+1," has low complexity characteristics")
    elif complexity[i] == 1:
        print("Intrinsic Mode Function ",i+1," has high complexity characteristics")

NameError: name 'complexity' is not defined

In [7]:
# Adding all the low complexity characteristics:

l - len(eIMFs[0])

new_eIMF = []
for i in range(0,l):
    new_eIMF.append(eIMFs[4][i]+eIMFs[5][i]+eIMFs[6][i]+eIMFs[7][i]+eIMFs[8][i])
    
print(new_eIMF)
    
## Write the Combined Intrinsic Mode functions:

write_list = []
print(l)
for i in range(0,l):
    temp = []
    for j in range(0,5):
        if j < 4:
            temp.append(eIMFs[j][i])
        else:
            temp.append(new_eIMF[i])
            
    write_list.append(temp)

with open("Final Intrinsic Mode Functions.csv","w",newline="") as f:
    writer = csv.writer(f)
    writer.writerows(write_list)
f.close()

## Perform Support Vector Regressions for the Final IMFs and the Residuals


[66.26933154215557, 66.22095039702657, 66.16885452689841, 66.11195686892098, 66.0489878437534, 65.97870136250195, 65.90010480190605, 65.81239426407353, 65.71494157700081, 65.60772683615991, 65.49149648594654, 65.36747105793043, 65.23690604162047, 65.1010569265255, 64.96117920215407, 64.81852835801514, 64.67435988361737, 64.52992926846953, 64.38649200208046, 64.2453035739589, 64.10761947361357, 63.97469519055325, 63.84778751790148, 63.72827957554345, 63.617835357712735, 63.518201476230054, 63.43122306568584, 63.35871067806714, 63.30252081805103, 63.26444823542664, 63.24607395488256, 63.24879705451168, 63.27380060213598, 63.32216646952024, 63.394974010250436, 63.493302577912964, 63.61823152609419, 63.77084020838001, 63.95220797835678, 64.16341417722371, 64.40544953349678, 64.67892563513205, 64.98422759999468, 65.32163101459288, 65.69071760376276, 66.089739733988, 66.5156284156552, 66.96459506514165, 67.43271565965514, 67.91606562717035, 68.41072039566114, 68.91275539310209, 69.4182460474