In [1]:
import os, time, csv, math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture


In [2]:
input_data_path=os.getcwd()+"/rawData"
distances=list(range(1,26))
nsamples=50
span_length=80
nsymbols=2048

######
output_data_path=os.getcwd()+"/trainingData"
min_dist=0
max_dist=3000
selCP=[1,7,10,15]
selCP_pos=[(-3,3),(1,1),(-1,-1),(1,-3)]
trainingProp=0.8
######

In [3]:
X=None
Y=[]
colnames=['i'+str(i) for i in range(nsymbols)]

for d in distances:
    dist=d*span_length
    if dist<min_dist or dist>max_dist: continue
    filename='consts_'+str(d)+'span.csv'
    df_aux=pd.read_csv(input_data_path+'/'+filename, sep=",", header=None)
    df_aux = df_aux.T
    df_aux.columns=colnames
    Y=Y+[dist]*df_aux.shape[0]
    if X is None: X=df_aux
    else: X=X.append(df_aux)

In [4]:
def strToTuple(s):
    s_aux=s.split("i")
    s=s_aux[0]+"j"
    return complex(s)
X=X.applymap(strToTuple)

In [5]:
X.shape

(1250, 2048)

In [6]:
def L2dist(a,b):
    return math.sqrt(math.pow(a[0]-b[0],2)+math.pow(a[1]-b[1],2))

In [7]:
F=[]
for i in range(X.shape[0]):
    data=list(X.iloc[i,:])
    data=[[float(d.real), float(d.imag)] for d in data]
    gmm = GaussianMixture(n_components=16, random_state=0, init_params='kmeans').fit(data)
    mus=gmm.means_
    sigmas=gmm.covariances_
    features=[Y[i]]
    for j in selCP_pos:
        mindist=None
        k_inc=None
        for k in range(16):
            d=L2dist(mus[k],j)
            if mindist is None or mindist>d:
                mindist=d
                k_inc=k
        #print(k_inc)
        #print(mus[k_inc])
        covmat=np.concatenate(list(sigmas[k_inc])).ravel().tolist()
        #features = [*features, *mus[k_inc], *[covmat[0], covmat[3]]]
        features = [*features, *mus[k_inc], *covmat]
    F.append(features)

R=np.random.permutation(len(F))    
nt=math.ceil(len(F)*trainingProp)

rows_training=R[0:nt]
rows_testing=R[(nt):(len(F)+1)]

In [8]:
header=['dist']
for j in selCP:
    header=[*header,*['mu_r_'+str(j),'mu_i_'+str(j),'sigma_rr_'+str(j),'sigma_ri_'+str(j),'sigma_ir_'+str(j),'sigma_ii_'+str(j)]]
    #header=[*header,*['mu_r_'+str(j),'mu_i_'+str(j),'sigma_rr_'+str(j),'sigma_ii_'+str(j)]]
    
    
F_training=[F[f] for f in rows_training]
F_testing=[F[f] for f in rows_testing]

with open(output_data_path+'/training_data.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    # write the header
    writer.writerow(header)
    # write multiple rows
    writer.writerows(F_training)
with open(output_data_path+'/testing_data.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    # write the header
    writer.writerow(header)
    # write multiple rows
    writer.writerows(F_testing)
