In [0]:
import pandas as pd
import numpy as np

In [0]:
#frequency and bus voltage from Load Dispatch Centre on 12th Oct 2019
data=pd.read_csv('data.txt')
data.head()

Unnamed: 0,frequency,busVoltage
0,50.01,11.7
1,50.01,11.7
2,50.01,11.8
3,50.03,11.8
4,50.03,11.8


#RLDA

## Signed Exponential Golomb Code

In [0]:
#where the order is zero
def SEGC(n):
  n=-2*n if n<=0 else 2*n-1
  n=bin(n+1)[2:]
  return '0'*(len(n)-1)+n

In [0]:
def SEGC_decode(code):
  cnt, i, ret = 0, 0, []
  while i<len(code):
    if code[i]=='0':
      cnt+=1
      i+=1
    else:
      x = int(code[i:i+cnt+1],2)
      x-=1
      x = -x//2 if x%2==0 else (x+1)//2
      ret.append(x)
      i+=cnt+1
      cnt = 0
  return ret

##Arithmetic Coding

In [0]:
def AE(string):
  
  a = ['','0','1']
  f = [0,string.count('0'),string.count('1')]
  cf = [f[1]+f[2],f[2],0]
  
  ret = ''
  
  low, high = 0, 9999
  for x in string:
    i = a.index(x)
    
    nlow = int(low+(high-low+1)*cf[i]/cf[0])
    nhigh = int(low+(high-low+1)*cf[i-1]/cf[0] -1)
    
    while int(nlow/1000) == int(nhigh/1000):
      ret+=str(int(nlow/1000))
      nlow = (int(nlow%1000))*10
      nhigh = (int(nhigh%1000))*10+9
    low = nlow
    high = nhigh
    
  ret+=str(low)
  
  return ret, f

In [0]:
def AD(string,f):
  a = ['','0','1']
  cf = [f[1]+f[2],f[2],0]
  
  ret = ''
  
  low, high, code = 0, 9999, int(string[:4])
  string = string[4:]
  
  while code!=low:
    #print(low,high,code,ret)
    index = int( ((code-low+1)*cf[0]-1) / (high-low+1) )
    k = 0
    while cf[k]>index:
      k+=1
    ret+=a[k]
    
    nlow = int(low+(high-low+1)*cf[k]/cf[0])
    nhigh = int(low+(high-low+1)*cf[k-1]/cf[0] -1)
    #print(nlow,nhigh,code,ret)
    
    while int(nlow/1000) == int(nhigh/1000):
      nlow = (int(nlow%1000))*10
      nhigh = (int(nhigh%1000))*10+9
      code = (int(code%1000))*10 + int(string[0])
      string = string[1:]
    
    low = nlow
    high = nhigh
  
  return ret
    

## Encoding

In [0]:
def differentialArray(freq):
  diff = [freq[0]]
  for i in range(1,len(freq)):
    diff.append(freq[i]-freq[i-1])
  return np.array(diff)

In [0]:
# Resumable Load Data compression Algorithm
freq = data.frequency
np.array(freq)

array([50.01, 50.01, 50.01, 50.03, 50.03, 50.06, 50.11, 50.11, 50.12,
       50.07, 50.03, 49.99, 49.99, 49.99, 49.97, 49.91, 50.  , 49.98,
       49.92, 49.98, 50.02, 49.98, 49.99, 50.  , 50.01])

In [0]:
# Normalise
freq = freq*100
freq = freq.astype(int)
freq = np.array(freq)
freq

array([5001, 5001, 5001, 5003, 5003, 5006, 5011, 5011, 5012, 5007, 5003,
       4999, 4999, 4999, 4997, 4991, 5000, 4998, 4992, 4998, 5002, 4998,
       4999, 5000, 5001])

In [0]:
# Value -> Differential array
diff = differentialArray(freq)
diff

array([5001,    0,    0,    2,    0,    3,    5,    0,    1,   -5,   -4,
         -4,    0,    0,   -2,   -6,    9,   -2,   -6,    6,    4,   -4,
          1,    1,    1])

In [0]:
# Differential array -> Variable length coding
code = ''
for i in diff:
  code += SEGC(i)
code

'00000000000001001110001001011001001001100001010101000010110001001000100111001010001101000010010001010001101000110000010000001001010010010'

In [0]:
# Arithmetic coding
output, f = AE(code)
output

'99538039332468447852156781403854921343224'

In [0]:
print('Original :', 24*4, 'bytes')
print('Compressed :', len(output), 'bytes')
print('CR :', (96-len(output))/96*100,'%')

Original : 96 bytes
Compressed : 41 bytes
CR : 57.291666666666664 %


## Decoding

In [0]:
def diffToArray(diff):
  a = [diff[0]]
  for i in range(1,len(diff)):
    a.append(a[i-1]+diff[i])
  return np.array(a)

In [0]:
# Arithmetic decoding
code = AD(output,f)

In [0]:
# Variable length code -> Differential array
diff = SEGC_decode(code)
np.array(diff)

array([5001,    0,    0,    2,    0,    3,    5,    0,    1,   -5,   -4,
         -4,    0,    0,   -2,   -6,    9,   -2,   -6,    6,    4,   -4,
          1,    1,    1])

In [0]:
# Differential array -> Value
a = diffToArray(diff)
a

array([5001, 5001, 5001, 5003, 5003, 5006, 5011, 5011, 5012, 5007, 5003,
       4999, 4999, 4999, 4997, 4991, 5000, 4998, 4992, 4998, 5002, 4998,
       4999, 5000, 5001])

In [0]:
# Removing normalization
a = a/100
a

array([50.01, 50.01, 50.01, 50.03, 50.03, 50.06, 50.11, 50.11, 50.12,
       50.07, 50.03, 49.99, 49.99, 49.99, 49.97, 49.91, 50.  , 49.98,
       49.92, 49.98, 50.02, 49.98, 49.99, 50.  , 50.01])

# DBEA

## Encoding

In [0]:
def differentialArray(freq):
  diff = [freq[0]]
  for i in range(1,len(freq)):
    diff.append(freq[i]-freq[i-1])
  return np.array(diff)

In [0]:
def zeroCountBinaryEncoding(diff):
  string = ''
  code = bin(diff[0])[2:]
  while len(code)<16:
    code = '0'+code
  string+=code

  cnt=0
  for i in range(1, len(diff)):
    if diff[i]==0:
      cnt+=1
    else:
      if cnt!=0:
        code = bin(cnt)[2:]
        while len(code)<8:
          code = '0'+code
        string+=code
        cnt = 0
      if diff[i]>0:
        code = bin(diff[i])[2:]
        while len(code)<6:
          code = '0'+code
        code = '10'+code
        string+=code
      elif diff[i]<0:
        code = bin(-diff[i])[2:]
        while len(code)<6:
          code = '0'+code
        code = '11'+code
        string+=code

  return string

In [0]:
def binaryToString(code):
  string = ''
  while len(code)>0:
    string+=chr(int(code[:8],2))
    code = code[8:]
  return string

In [0]:
freq = data.frequency
np.array(freq)

array([50.01, 50.01, 50.01, 50.03, 50.03, 50.06, 50.11, 50.11, 50.12,
       50.07, 50.03, 49.99, 49.99, 49.99, 49.97, 49.91, 50.  , 49.98,
       49.92, 49.98, 50.02, 49.98, 49.99, 50.  , 50.01])

In [0]:
# Normalise
freq = freq*100
freq = freq.astype(int)
freq = np.array(freq)
freq

array([5001, 5001, 5001, 5003, 5003, 5006, 5011, 5011, 5012, 5007, 5003,
       4999, 4999, 4999, 4997, 4991, 5000, 4998, 4992, 4998, 5002, 4998,
       4999, 5000, 5001])

In [0]:
# Value -> Differential array
diff = differentialArray(freq)
diff

array([5001,    0,    0,    2,    0,    3,    5,    0,    1,   -5,   -4,
         -4,    0,    0,   -2,   -6,    9,   -2,   -6,    6,    4,   -4,
          1,    1,    1])

In [0]:
# Zero counts and Binary Encoding
code = zeroCountBinaryEncoding(diff)
code

'000100111000100100000010100000100000000110000011100001010000000110000001110001011100010011000100000000101100001011000110100010011100001011000110100001101000010011000100100000011000000110000001'

In [0]:
# Binary -> Text
string = binaryToString(code)
string


'\x13\x89\x02\x82\x01\x83\x85\x01\x81ÅÄÄ\x02ÂÆ\x89ÂÆ\x86\x84Ä\x81\x81\x81'

In [0]:
print('Original :', 24*4, 'bytes')
print('Compressed :', len(string), 'bytes')
print('CR :', (96-len(string))/96*100,'%')

Original : 96 bytes
Compressed : 24 bytes
CR : 75.0 %


## Decoding

In [0]:
def StringToDiff(string):
  diff = []
  temp = ord(string[0])*256 + ord(string[1])
  string = string[2:]
  diff.append(temp)
  while len(string)>0:
    temp = ord(string[0])    
    string = string[1:]
    if temp < 128:
      while temp>0:
        diff.append(0)
        temp-=1
    elif temp >= 192:
      temp = 192-temp
      diff.append(temp)
    else:
      temp = temp-128
      diff.append(temp)
  return diff

In [0]:
def diffToArray(diff):
  a = [diff[0]]
  for i in range(1,len(diff)):
    a.append(a[i-1]+diff[i])
  return np.array(a)

In [0]:
# String -> ASCII -> Differential Array
diff = StringToDiff(string)
np.array(diff)

array([5001,    0,    0,    2,    0,    3,    5,    0,    1,   -5,   -4,
         -4,    0,    0,   -2,   -6,    9,   -2,   -6,    6,    4,   -4,
          1,    1,    1])

In [0]:
# Differential Array -> Value
a = diffToArray(diff)
a

array([5001, 5001, 5001, 5003, 5003, 5006, 5011, 5011, 5012, 5007, 5003,
       4999, 4999, 4999, 4997, 4991, 5000, 4998, 4992, 4998, 5002, 4998,
       4999, 5000, 5001])

In [0]:
# Removing Normalization
a = a/100
a

array([50.01, 50.01, 50.01, 50.03, 50.03, 50.06, 50.11, 50.11, 50.12,
       50.07, 50.03, 49.99, 49.99, 49.99, 49.97, 49.91, 50.  , 49.98,
       49.92, 49.98, 50.02, 49.98, 49.99, 50.  , 50.01])

# Adaptive Arithmetic Encoding

In [0]:
def cumFreq(f,cf):
  i=len(f)-1
  while i>0:
    cf[i-1]=cf[i]+f[i]
    i-=1
  return cf

In [0]:
def AAE(string):
  n=8
  
  a  = ['', '0', '1']
  f  = [0, 1, 1]
  cf = [2, 1, 0]
  
  ret=''
  
  low, high = 0, 255
  for x in string:
    i=a.index(x)
    
    nlow = int(low+(high-low+1)*cf[i]/cf[0])
    nhigh = int(low+(high-low+1)*cf[i-1]/cf[0])
    
    wcnt=0
    while True:
      
      while (nlow & 0x80)==(nhigh & 0x80):
        if nlow & 0x80 == 0:
          ret+='1'*wcnt
          wcnt=0
        else:
          ret+='0'*wcnt
          wcnt=0
        nlow=(nlow*2)%256
        nhigh=(nhigh*2+1)%256
        
      while nhigh-nlow<cf[0]:
        nlow = 2*(nlow-2**(n-2))%256
        nhigh = (2*(nhigh-2**(n-2))+1)%256
        wcnt+=1
      
      if (nlow & 0x80)!=(nhigh & 0x80):break
    low=nlow
    high=nhigh
    f[i]+=1
    
    if f[1]>f[2]:
      f[1],f[2]=f[2],f[1]
      a[1],a[2]=a[2],a[1]
    cf=cumFreq(f,cf)
  
  low=
  return ret
          