# Preprocessing

This code performs the GMM feature extraction given the raw optical signal samples.

In [24]:
# Libraries for correct code execution 

import os, time, csv, math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture

In [25]:
# Chunk to mount Google Drive on Google Colab 

# Google drive loading as work station for local-usage of the files.
from google.colab import drive
drive.mount('/content/gdrive',force_remount= True)

Mounted at /content/gdrive


In [26]:
# List with the users that will execute the code.
workers = ["Ronald","Local"]

# Modify index to change the user.
worker = workers[0]

if worker == "Ronald":
  path= "/content/gdrive/MyDrive/Thesis_Workstation/ANN_dataset"
else: path = os.getcwd()

In [29]:
input_data_path=path+"/rawData"
distances=list(range(1,26))
nsamples=50
span_length=80
nsymbols=2048

######
output_data_path=path+"/trainingData"
min_dist=0
max_dist=3000
selCP=[i for i in range(1, 17)]
selCP_pos=[(-3,3),(-1,3),(1,3),(3,3),(-3,1),(-1,1),(1,1),(3,1),(-3,-1),(-1,-1),(1,-1),(3,-1),(-3,-3),(-1,-3),(1,-3),(3,-3)]
trainingProp=0.8
my_centers=[[-3,3],[-1,3],[1,3],[3,3],[-3,1],[-1,1],[1,1],[3,1],[-3,-1],[-1,-1],[1,-1],[3,-1], [-3,-3],[-1,-3],[1,-3],[3,-3]]
######

In [30]:
X=None
Y=[]
colnames=['i'+str(i) for i in range(nsymbols)]

for d in distances:
    dist=d*span_length
    if dist<min_dist or dist>max_dist: continue
    filename='consts_'+str(d)+'span.csv'
    df_aux=pd.read_csv(input_data_path+'/'+filename, sep=",", header=None)
    df_aux = df_aux.T
    df_aux.columns=colnames
    Y=Y+[dist]*df_aux.shape[0]
    if X is None: X=df_aux
    else: X=X.append(df_aux)

In [31]:
def strToTuple(s):
    s_aux=s.split("i")
    s=s_aux[0]+"j"
    return complex(s)
X=X.applymap(strToTuple)

In [32]:
# Visualizing the shape of the data 

X.shape

(1250, 2048)

In [33]:
def L2dist(a,b):
    return math.sqrt(math.pow(a[0]-b[0],2)+math.pow(a[1]-b[1],2))

In [34]:
os = list(X.iloc[901:921,].values)
x = []
y = []
for obs in os:
  for symbol in obs:
      if symbol.real > -2 and symbol.real < 0 and symbol.imag > 2 and symbol.imag < 4:
          x.append(symbol.real)
          y.append(symbol.imag)

x.append(20)
y.append(-20)
x.append(-20)
y.append(20)


In [35]:
# Doubt 1
'''
x = []
y = []
for obs in b:
    x.append(obs[0])
    y.append(obs[1])
'''    

'\nx = []\ny = []\nfor obs in b:\n    x.append(obs[0])\n    y.append(obs[1])\n'

In [36]:
# Train/test partition

train_idxs = []
test_idxs = []
for i in range(25):
    for j in range(25):
        train_idxs.append(50*i + j)
        test_idxs.append(50*(i+1)-1-j)

In [37]:
# Training/test feature extraction

F=[]
for i in range(X.shape[0]):
    data=list(X.iloc[i,:])
    data=[[float(d.real), float(d.imag)] for d in data]
    gmm = GaussianMixture(n_components=16, random_state=0, means_init=my_centers).fit(data)
    mus=gmm.means_
    sigmas=gmm.covariances_
    features=[Y[i]]
    for j in selCP_pos:
        mindist=None
        k_inc=None
        for k in range(16):
            d=L2dist(mus[k],j)
            if mindist is None or mindist>d:
                mindist=d
                k_inc=k
        #print(k_inc)
        #print(mus[k_inc])
        covmat=np.concatenate(list(sigmas[k_inc])).ravel().tolist()
        #features = [*features, *mus[k_inc], *[covmat[0], covmat[3]]]
        features = [*features, *mus[k_inc], *covmat]
    F.append(features)

F_training=[F[f] for f in train_idxs]
F_testing=[F[f] for f in test_idxs]

In [38]:
# CSV creation
 
header=['dist']
for j in selCP:
    header=[*header,*['mu_r_'+str(j),'mu_i_'+str(j),'sigma_rr_'+str(j),'sigma_ri_'+str(j),'sigma_ir_'+str(j),'sigma_ii_'+str(j)]]
    #header=[*header,*['mu_r_'+str(j),'mu_i_'+str(j),'sigma_rr_'+str(j),'sigma_ii_'+str(j)]]
    
with open(output_data_path+'/training_data.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    # write the header
    writer.writerow(header)
    # write multiple rows
    writer.writerows(F_training)
with open(output_data_path+'/testing_data.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    # write the header
    writer.writerow(header)
    # write multiple rows
    writer.writerows(F_testing)

---
## Extra

In [39]:
# Chunk to plot distributions
import plotly.express as px

fig = px.density_heatmap(x=x, y=y, histnorm = 'probability', nbinsx=600, nbinsy=600)

fig.update_yaxes(range=[2, 4], showticklabels=False)
fig.update_xaxes(range=[-4, -2], showticklabels=False)

#fig.update_traces(marker=dict(
#        color='#007bbf', size=18))

fig.update_layout(width=640, height=600)
fig.show()

In [40]:
# Chunk to visualize distributions with Gaussian filtering

# ImageFilter for using filter() function
from PIL import Image, ImageFilter
  
# Opening the image 
# (R prefixed to string in order to deal with '\' in paths)
image = Image.open("image.png")
  
# Blurring image by sending the ImageFilter.
# GaussianBlur predefined kernel argument
image = image.filter(ImageFilter.GaussianBlur(radius=5))
  
# Displaying the image
image.show()

FileNotFoundError: ignored

In [41]:
# Visualizing the previously created image

image

NameError: ignored