<a href="https://colab.research.google.com/github/R0N3ldrt/Thesis/blob/main/Symbol-to-Symbol_Attack_Clean-Synthetic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Symbol-to_Symbol Attack

# Loading Necesary Libraries

In [1]:
# Importing necesary libraries
# Libraries for correct code execution 

import os, time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import pickle
import random
import csv
import re
import plotly.graph_objects as go
import plotly.express as px

from tqdm import tqdm
from math import modf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from plotly.subplots import make_subplots
from plotly.subplots import make_subplots

from keras.models import Sequential
from keras.layers import Dense,Conv2D, Flatten
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping
from sklearn.mixture import GaussianMixture

import warnings
warnings.filterwarnings("ignore")

# Generate Train/Test data

In [2]:
def create_df(input_data_path, distances, nsymbols = 2048, min_dist = 0, max_dist = 3000):
  X=None
  Y=[]
  colnames=['i'+str(i) for i in range(nsymbols)]

  for d in distances:
    dist=d*span_length
    if dist<min_dist or dist>max_dist: continue
    filename='consts_'+str(d)+'span.csv'
    df_aux=pd.read_csv(input_data_path+'/'+filename, sep=",", header=None)
    df_aux = df_aux.T
    df_aux.columns=colnames
    Y=Y+[dist]*df_aux.shape[0]
    if X is None: X=df_aux
    else: X=X.append(df_aux)
  return X, Y

In [3]:
def strToTuple(s):
    s_aux=s.split("i")
    s=s_aux[0]+"j"
    return complex(s)

def strToTuple_v2(s):
    return complex(s)

In [4]:
def test_train_indexes_v1():
  train_idxs = []
  test_idxs = []
  for i in range(25):
      for j in range(25):
          train_idxs.append(50*i + j)
          test_idxs.append(50*(i+1)-1-j)
  return train_idxs, test_idxs

In [5]:
def test_train_indexes_v2(num_files=25):
  train_idxs2 = []
  test_idxs2 = []
  for i in range(num_files):
      for j in range(num_files):
          test_idxs2.append(50*(i+1)-1-j)

  for i in range(num_files):
    for j in range(2):
      train_idxs2.append(50*i + j)
  return train_idxs2, test_idxs2

# Utils

## Gradient Descent Function

In [6]:
def grad_b (M,m_og,m_tg,b,beta):
  return 2*beta*(M @ m_og + b - m_tg)

In [7]:
def grad_M(M,cov_og,cov_tg,alpha):
    error = M @ cov_og @ np.transpose(M) - cov_tg  

    m1 = (2*error[0][0] * (2*cov_og[0][0]*M[0][0] + 2*cov_og[0][1]*M[0][1]) +
          2*(error[0][1] + error[1][0]) * (cov_og[0][0]*M[1][0] + cov_og[0][1]*M[1][1]))
    
    m2 = (2*error[0][0] * (2*cov_og[1][1]*M[0][1] + 2*cov_og[0][1]*M[0][0]) +
          2*(error[0][1] + error[1][0]) * (cov_og[0][1]*M[1][0] + cov_og[1][1]*M[1][1]))
    
    m3 = (2*error[1][1] * (2*cov_og[0][0]*M[1][0] + 2*cov_og[0][1]*M[1][1]) +
          2*(error[0][1] + error[1][0]) * (cov_og[0][0]*M[0][0] + cov_og[0][1]*M[0][1]))

    m4 = (2*error[1][1] * (2*cov_og[0][0]*M[1][1] + 2*cov_og[0][1]*M[1][0]) +
          2*(error[0][1] + error[1][0]) * (cov_og[0][1]*M[0][0] + cov_og[1][1]*M[0][1]))
    
    return alpha*np.array([[m1, m2], [m3, m4]])

In [8]:
def gradient_descent(alpha,beta,m_tg,m_og,cov_tg,cov_og,nu,log):
  Ms = []
  bs = []
  M = np.random.rand(2,2)
  #M = np.array([[1, 0], [0, 1]])
  b = np.random.rand(2,1)
  #b = np.array([[0], [0]])
  #for i in range(100000):
  i = 0
  while True:
    # print('From:', M @ cov_source @ np.transpose(M))
    # print('To:', cov_target)
    L = loss(alpha,beta,m_tg,m_og,cov_tg,cov_og,M,b) 
    if  L < 1e-20:
        Ms.append(M)
        bs.append(b)
        break
    b = b - nu*grad_b(M,m_og,m_tg,b,beta)
    M = M - nu*grad_M(M,cov_og,cov_tg,alpha)
    i+= 1
    if (i>= 5000 and i <= 5025) or (i>= 5975 and i <= 6000):
      Ms.append(M)
      bs.append(b)
    if not i%5000 and log: print(L)
  if log: print("-"*25)
  return Ms,bs

## Compute parameters and mean/covariance

In [9]:
def compute_mean_and_cov(data):
  aux_x = [] # Reales
  aux_y = [] # Imag
  for obs in data:
    aux_x.append(obs[0])
    aux_y.append(obs[1])
  return np.array([[np.mean(aux_x)],[np.mean(aux_y)]]), np.cov(aux_x,aux_y)


def total_loss(m_mod,m_tg,cov_mod,cov_tg):
  return (sum(sum(np.power(cov_mod-cov_tg,2))) + sum(np.power(m_mod - m_tg,2)))[0]


def loss(alpha,beta,m_tg,m_og,cov_tg,cov_og,M,b):
  first = alpha*sum(sum(np.power(M @ cov_og @ np.transpose(M) - cov_tg, 2)))
  second = beta*sum(np.power(M @ m_og + b - m_tg, 2))
  a = first+second
  return a

In [10]:
def compute_parameters (const,method,source,target, params = None, log = False):
  if const  == 1: limits = [[-2,-4],[4,2]]
  elif const == 7: limits = [[2,0],[2,0]]
  elif const == 10: limits = [[0,-2],[0,-2]]
  elif const == 15: limits = [[2,0],[-2,-4]]

  q_source = []
  q_target = []

  for i in range(len(source)):

    q_source += [[x.real,x.imag] for x in source.values.tolist()[i] if x.real < limits[0][0] and x.real > limits[0][1] and x.imag < limits[1][0] and x.imag > limits[1][1]]
    q_target += [[x.real,x.imag] for x in target.values.tolist()[i]if x.real < limits[0][0] and x.real > limits[0][1] and x.imag < limits[1][0] and x.imag > limits[1][1]]

  unbiased = (len(q_source)/(len(q_source)-1) * 125/126)

  mean_source,cov_source = compute_mean_and_cov(q_source)
  mean_target, cov_target= compute_mean_and_cov(q_target)

  cov_source *= unbiased
  cov_target *= unbiased

  if method == "GD":
    if (params is None):
      alpha = 3/4
      beta = 1/4
    else:
      alpha = params["alpha"] if params["alpha"]>0 and params["alpha"]<=1 and params["alpha"]>params["beta"]  else 3/4
      beta = params["beta"] if params["beta"]>=0 and params["beta"]<1 and params["beta"]<params["alpha"]  else 1/4

    M, b = gradient_descent(alpha,beta,mean_target,mean_source,cov_target,cov_source, 0.5,log)
    return M,b

  elif method == "Z":
    return mean_source,cov_source,mean_target,cov_target

## Modify GD/Z

In [11]:
def modify_const_GD(const,source,M,b, target = None, return_plots=False):

  if const  == 1: limits = [[-2,-4],[4,2]]
  elif const == 7: limits = [[2,0],[2,0]]
  elif const == 10: limits = [[0,-2],[0,-2]]
  elif const == 15: limits = [[2,0],[-2,-4]]

  q_source = []
  indexes = []

  for i in range(len(source)):
    row_source = source.values.tolist()[i]
    for j in range(len(row_source)):
      point_source = row_source[j]
      if point_source.real < limits[0][0] and point_source.real > limits[0][1] and point_source.imag < limits[1][0] and point_source.imag > limits[1][1]:
        q_source += [[point_source.real,point_source.imag]]
        indexes.append([i,j])

  if target is not None:
    q_target = []

    for i in range(len(source)):
      row_target = target.values.tolist()[i]
      for j in range(len(row_source)):
        point_target = row_target[j]
        if point_target.real < limits[0][0] and point_target.real > limits[0][1] and point_target.imag < limits[1][0] and point_target.imag > limits[1][1]:
          q_target += [[point_target.real,point_target.imag]]
      
  mults = [np.array([[x[0]],[x[1]]]) for x in q_source]
  new_points = []

  for mult in mults:
    res = (M @ mult + b).tolist()

    # --------- Old line ---------
    #new_points.append([res[0][0],res[1][0]])
    nested_check = any(isinstance(i, list) for i in res[0])
    if nested_check:
      new_points.append([res[0][0][0],res[0][1][0]])
    else:
      new_points.append([res[0][0],res[1][0]])

  if target is not None:
    if return_plots:
      generate_comparison_plot(new_points,q_target)
      return new_points, q_target

  return new_points, indexes

In [12]:
def modify_const_Z(const,source,mean_source,cov_source,mean_target,cov_target,target = None, return_plots=False):
  if const  == 1: limits = [[-2,-4],[4,2]]
  elif const == 7: limits = [[2,0],[2,0]]
  elif const == 10: limits = [[0,-2],[0,-2]]
  elif const == 15: limits = [[2,0],[-2,-4]]

  q_source = []
  indexes = []

  for i in range(len(source)):
    row_source = source.values.tolist()[i]
    for j in range(len(row_source)):
      point_source = row_source[j]
      if point_source.real < limits[0][0] and point_source.real > limits[0][1] and point_source.imag < limits[1][0] and point_source.imag > limits[1][1]:
        q_source += [[point_source.real,point_source.imag]]
        indexes.append([i,j])

  if target is not None:
    q_target = []

    for i in range(len(source)):
      row_target = target.values.tolist()[i]
      for j in range(len(row_source)):
        point_target = row_target[j]
        if point_target.real < limits[0][0] and point_target.real > limits[0][1] and point_target.imag < limits[1][0] and point_target.imag > limits[1][1]:
          q_target += [[point_target.real,point_target.imag]]

  mults = [np.array([[x[0]],[x[1]]]) for x in q_source]
  new_points = []

  w, v = np.linalg.eig(cov_source)
  S1 = v @ np.diag(np.sqrt(w)) @ np.linalg.inv(v)
  w, v = np.linalg.eig(cov_target)
  S2 = v @ np.diag(np.sqrt(w)) @ np.linalg.inv(v)

  for mult in mults:
    if np.linalg.det(S1)==0:
      print('Error Singular Matrix')
    else:
      normalized = np.linalg.inv(S1) @ (mult - mean_source)
      denormalized = S2 @ normalized + mean_target

      new_points.append([denormalized[0][0], denormalized[1][0]])

  if target is not None:
    if return_plots:
      generate_comparison_plot(new_points,q_target)
      

  return new_points, indexes

## Compare plots

In [13]:
def generate_comparison_plot(mod_points,ptarget, return_plots=False):
  x1 = [x[0] for  x in mod_points]
  y1 = [x[1] for  x in mod_points]
  x2 = [x[0] for  x in ptarget]
  y2 = [x[1] for  x in ptarget]

  if return_plots:
      return go.Scatter(x = x1, y = y1, mode='markers'), go.Scatter(x = x2, y = y2,mode='markers') 

  fig = make_subplots(rows=1, cols=2)
  fig.add_trace(
    go.Scatter(x = x1, y = y1,mode='markers'),
    
    row=1, col=1
  )
  fig.add_trace(
    go.Scatter(x = x2, y = y2,mode='markers'),
    row=1, col=2
  )
  fig.update_layout(height=500, width=1000, title_text="Point comparison",autosize = False)
  fig.show()

## Distance Calc

In [14]:
def L2dist(a,b):
    return math.sqrt(math.pow(a[0]-b[0],2)+math.pow(a[1]-b[1],2))

## Mod files generator

In [15]:
def mod_df_generator(df, output_filepath, raw_data = True, mod_i = None, method = None):
  sample_arr = []
  symbol_arr = []
  real_arr = []
  imag_arr = []

  # For alraeady modify data
  if raw_data != True:

    source_arr = []
    target_arr = []

    vals_df = df.iloc[:, 2:df.shape[1]]
    vals_df = vals_df.applymap(strToTuple_v2)

    distances_df = df.iloc[:, [0, 1]]

    sample_cnt = 1
    for i in range(len(vals_df)):
      symbol_cnt = 1
      for x in vals_df.values.tolist()[i]:

        real_arr.append(x.real)
        imag_arr.append(x.imag)

        sample_arr.append(sample_cnt)
        symbol_arr.append(symbol_cnt)    
        symbol_cnt += 1

        source_arr.append(distances_df['source_distance'].loc[distances_df.index[i]])
        target_arr.append(distances_df['target_distance'].loc[distances_df.index[i]])
        
      sample_cnt += 1

    data = {'Source_Distance':source_arr, 'Target_Distance':target_arr, 'Sample_Id':sample_arr, 'Symbol_Id':symbol_arr, 'Real':real_arr, 'Imag':imag_arr}  
    
    mod_df = pd.DataFrame(data)

    filename = ('/{}_mod/consts_modified_source_distance_{}_alpha_75_beta_25.csv'.format(str(method), str(mod_i)))

    mod_df.to_csv(str(output_filepath)+filename, index=False, encoding='utf-8-sig')
    
  else:
    vals_df = df

    sample_cnt = 1
    file_cnt = 1
    for i in range(len(vals_df)):
      symbol_cnt = 1
      for x in vals_df.values.tolist()[i]:
        real_arr.append(x.real)
        imag_arr.append(x.imag)

        sample_arr.append(sample_cnt)
        symbol_arr.append(symbol_cnt)
        symbol_cnt += 1

      sample_cnt += 1
      if (i+1) % 50 == 0:
        data = {'Sample_Id':sample_arr, 'Symbol_Id':symbol_arr, 'Real':real_arr, 'Imag':imag_arr}  
        
        mod_df = pd.DataFrame(data)
        
        filename = ('/rawData_mod/consts_{}span_mod.csv'.format(file_cnt))

        file_cnt += 1
        mod_df.to_csv(str(output_filepath)+filename, index=False, encoding='utf-8-sig') 
        
        # Reset values
        sample_cnt = 1
        sample_arr = []
        symbol_arr = []
        real_arr = []
        imag_arr = [] 

# Main

## Main v1

In [16]:
def symbol2symbol_main(method, quadrants, input_data_path, distances, output_path, params = None, mod_csv = False, mod_original = False, nsymbols = 2048, min_dist = 0, max_dist = 3000):
  
  if params is None and method == 'GD':
    output_features = "/{}_alpha_p75_beta_p25_features.csv".format(str(method))
  elif method == 'Z':
    output_features = "/{}_features.csv".format(str(method))
  elif params is not None and method == 'GD':
    decimal_alpha = modf(params['alpha'])
    decimal_alpha  = round(decimal_alpha[0], 2)
    decimal_alpha = re.findall('..(.*)', str(decimal_alpha))[0]

    decimal_beta = modf(params['beta'])
    decimal_beta  = round(decimal_beta[0], 2)
    decimal_beta = re.findall('..(.*)', str(decimal_beta))[0]

    output_features = "/{}_alpha_p{}_beta_p{}_features.csv".format(str(method), str(decimal_alpha), str(decimal_beta))

  # Get input data
  X, Y = create_df(input_data_path, distances, nsymbols, min_dist, max_dist)
  X = X.applymap(strToTuple)

  # Generate mod of the original file
  if mod_original == True:
    print('Generating the modified file for the original data')
    mod_df_generator(X, input_data_path)
  
  train_idxs2, test_idxs2 = test_train_indexes_v2()

  X_train2 = X.iloc[train_idxs2].reset_index(drop = "True")
  X_test2 = X.iloc[test_idxs2].reset_index(drop = "True") 

  data_new = {}

  for i in range(0,25):
    data_new[str(80*(i+1))] = {"Train":X_train2.iloc[i*2:(i*2+2)].reset_index(drop = "True"),"Test":X_test2.iloc[i*25:(i*25+25)].reset_index(drop = "True")}  

  first = True
  modify_all_data_df = pd.DataFrame()
  for i in tqdm(range(25)):
      print('\n')
      print("i = ",i)
      if mod_csv == True:
        modify_data_df = pd.DataFrame()
      for j in range(i+1,25):
          print(" j = ",j)
          source = data_new[str(distances[i]*80)]['Train']
          target = data_new[str(distances[j]*80)]['Train']
          source_test = data_new[str(distances[i]*80)]['Test']

          source_test2 = source_test.copy()

          for quadrant in quadrants:
              print("   quadrant = ", quadrant)
              if method == "GD":
                M, b = compute_parameters(quadrant, 'GD', source, target, params=params)
                new_points, indexes = modify_const_GD(quadrant, source_test, M, b)
              elif method == "Z":
                mean_source,cov_source,mean_target,cov_target = compute_parameters(quadrant, 'Z', source, target, params=None)
                new_points, indexes = modify_const_Z(quadrant,source_test,mean_source,cov_source,mean_target,cov_target)

              for k in range(len(indexes)):
                  source_test2.iloc[indexes[k][0],indexes[k][1]] = complex(new_points[k][0], new_points[k][1])

          if mod_csv == True:
            old_df = source_test2.copy()
            old_df.insert(loc = 0,column = 'source_distance',value = str(distances[i]*80))
            old_df.insert(loc = 1,column = 'target_distance',value = str(distances[j]*80))

            modify_data_df  = pd.concat([modify_data_df, old_df], ignore_index=True)
       
          F=[]
          for k in range(source_test2.shape[0]):
              data2=list(source_test2.iloc[k,:])
              data2=[[float(d.real), float(d.imag)] for d in data2]
              gmm = GaussianMixture(n_components=16, random_state=0, means_init=my_centers).fit(data2)
              mus=gmm.means_
              sigmas=gmm.covariances_

              features=[distances[i]*80, distances[j]*80]

              for z in selCP_pos:
                  mindist=None
                  k_inc=None
                  for w in range(16):
                      d=L2dist(mus[w],z)
                      if mindist is None or mindist>d:
                          mindist=d
                          k_inc=w

                  covmat=np.concatenate(list(sigmas[k_inc])).ravel().tolist()
                  features = [*features, *mus[k_inc], *covmat]
              F.append(features)
          
          header=['original_dist', 'target_dist']
          
          for j in selCP:
              header=[*header,*['mu_r_'+str(j),'mu_i_'+str(j),'sigma_rr_'+str(j),'sigma_ri_'+str(j),'sigma_ir_'+str(j),'sigma_ii_'+str(j)]]
   
          with open(output_path + output_features, 'a', encoding='UTF8', newline='') as f:
              writer = csv.writer(f)
              # write the header
              if first:
                writer.writerow(header)
                first = False
              # write multiple rows
              writer.writerows(F)  

      if mod_csv == True and i<24:
        # Generate mod for modified file
        print('Generating the modified file for the modified data')
        mod_df_generator(modify_data_df, output_path, raw_data = False, mod_i = str(distances[i]*80), method = method)

        # Get modified data points
        modify_all_data_df  = pd.concat([modify_all_data_df, modify_data_df], ignore_index=True)

  if mod_csv == True:
    # Save the modified data
    print('Saving the modified data')
    filename = ('/{}_consts_modified_data.csv'.format(str(method)))
    # Removing parenthesis of complex numbers
    headers_names = list(modify_all_data_df.columns)
    for h in headers_names:
      if h != 'source_distance' or h != 'target_distance':
        modify_all_data_df[h] = modify_all_data_df[h].apply(str).str.replace('\(|\)','')
    modify_all_data_df.to_csv(str(output_path)+filename, index=False, encoding='utf-8-sig')        

## Main v2

In [28]:
def symbol2symbol_main_v2(method, quadrants, input_data_path, distances, output_path, source_params=None, params = None, mod_csv = False, mod_original = False, nsymbols = 2048, min_dist = 0, max_dist = 3000):
  
  if params is None and method == 'GD':
    output_features = "/{}_alpha_p75_beta_p25_radius_{}_funct_{}_features.csv".format(str(method), str(source_params[1]), str(source_params[2]))
  elif method == 'Z':
    output_features = "/{}_radius_{}_funct_{}_features.csv".format(str(method), str(source_params[1]), str(source_params[2]))
  elif params is not None and method == 'GD':
    decimal_alpha = modf(params['alpha'])
    decimal_alpha  = round(decimal_alpha[0], 2)
    decimal_alpha = re.findall('..(.*)', str(decimal_alpha))[0]

    decimal_beta = modf(params['beta'])
    decimal_beta  = round(decimal_beta[0], 2)
    decimal_beta = re.findall('..(.*)', str(decimal_beta))[0]

    output_features = "/{}_alpha_p{}_beta_p{}_radius_{}_funct_{}_features.csv".format(str(method), str(decimal_alpha), str(decimal_beta), str(source_params[1]), str(source_params[2]))
 
  # Get input data
  X, Y = create_df(input_data_path, distances, nsymbols, min_dist, max_dist)
  X = X.applymap(strToTuple)
  
  source_df = read_synthetic_data_v2(input_path=source_params[0], radius=source_params[1], function=source_params[2])
  source_df = source_df.applymap(strToTuple_v2)

  # Generate mod of the original file
  if mod_original == True:
    print('Generating the modified file for the original data')
    mod_df_generator(X, input_data_path)
  
  train_idxs2, test_idxs2 = test_train_indexes_v2()
  X_train2 = X.iloc[train_idxs2].reset_index(drop = "True")
  X_test2 = X.iloc[test_idxs2].reset_index(drop = "True") 

  source_train_idxs2, source_test_idxs2 = test_train_indexes_v2(num_files=1)
  source_X_train2 = source_df.iloc[source_train_idxs2].reset_index(drop = "True")
  source_X_test2 = source_df.iloc[source_test_idxs2].reset_index(drop = "True") 

  data_new = {}

  for i in range(0,25):
    if i == 0:
      data_new[str(80*(i+1))] = {"Train":source_X_train2.iloc[i*2:(i*2+2)].reset_index(drop = "True"),"Test":source_X_test2.iloc[i*25:(i*25+25)].reset_index(drop = "True")}
    else:
      data_new[str(80*(i+1))] = {"Train":X_train2.iloc[i*2:(i*2+2)].reset_index(drop = "True"),"Test":X_test2.iloc[i*25:(i*25+25)].reset_index(drop = "True")}  

  first = True
  modify_all_data_df = pd.DataFrame()

  # modifying algorithm to only calculate for one source distance
  i_loop = 25
  if source_params == None:
    i_loop = 1

  for i in tqdm(range(i_loop)):
      print('\n')
      print("i = ",i)

      if mod_csv == True:
        modify_data_df = pd.DataFrame()

      for j in range(i+1,25):
          print(" j = ",j)        
          source = data_new[str(distances[i]*80)]['Train']
          target = data_new[str(distances[j]*80)]['Train']

          source_test = data_new[str(distances[i]*80)]['Test']
          source_test2 = source_test.copy()

          for quadrant in quadrants:
              print("   quadrant = ", quadrant)
              if method == "GD":
                M, b = compute_parameters(quadrant, 'GD', source, target, params=params)
                new_points, indexes = modify_const_GD(quadrant, source_test, M, b)
              elif method == "Z":
                mean_source,cov_source,mean_target,cov_target = compute_parameters(quadrant, 'Z', source, target, params=None)
                new_points, indexes = modify_const_Z(quadrant,source_test,mean_source,cov_source,mean_target,cov_target)

              for k in range(len(indexes)):
                  source_test2.iloc[indexes[k][0],indexes[k][1]] = complex(new_points[k][0], new_points[k][1])

          if mod_csv == True:
            old_df = source_test2.copy()
            old_df.insert(loc = 0,column = 'source_distance',value = str(distances[i]*80))
            old_df.insert(loc = 1,column = 'target_distance',value = str(distances[j]*80))

            modify_data_df  = pd.concat([modify_data_df, old_df], ignore_index=True)
       
          F=[]
          for k in range(source_test2.shape[0]):
              data2=list(source_test2.iloc[k,:])
              data2=[[float(d.real), float(d.imag)] for d in data2]
              gmm = GaussianMixture(n_components=16, random_state=0, means_init=my_centers).fit(data2)
              mus=gmm.means_
              sigmas=gmm.covariances_

              features=[distances[i]*80, distances[j]*80]

              for z in selCP_pos:
                  mindist=None
                  k_inc=None
                  for w in range(16):
                      d=L2dist(mus[w],z)
                      if mindist is None or mindist>d:
                          mindist=d
                          k_inc=w

                  covmat=np.concatenate(list(sigmas[k_inc])).ravel().tolist()
                  features = [*features, *mus[k_inc], *covmat]
              F.append(features)
          
          header=['original_dist', 'target_dist']
          
          for j in selCP:
              header=[*header,*['mu_r_'+str(j),'mu_i_'+str(j),'sigma_rr_'+str(j),'sigma_ri_'+str(j),'sigma_ir_'+str(j),'sigma_ii_'+str(j)]]
   
          with open(output_path + output_features, 'a', encoding='UTF8', newline='') as f:
              writer = csv.writer(f)
              # write the header
              if first:
                writer.writerow(header)
                first = False
              # write multiple rows
              writer.writerows(F)  

      if mod_csv == True and i<24:
        # Generate mod for modified file
        print('Generating the modified file for the modified data')
        mod_df_generator(modify_data_df, output_path, raw_data = False, mod_i = str(distances[i]*80), method = method)

        # Get modified data points
        modify_all_data_df  = pd.concat([modify_all_data_df, modify_data_df], ignore_index=True)

  if mod_csv == True:
    # Save the modified data
    print('Saving the modified data')
    filename = ('/{}_consts_modified_data.csv'.format(str(method)))
    # Removing parenthesis of complex numbers
    headers_names = list(modify_all_data_df.columns)
    for h in headers_names:
      if h != 'source_distance' or h != 'target_distance':
        modify_all_data_df[h] = modify_all_data_df[h].apply(str).str.replace('\(|\)','')
    modify_all_data_df.to_csv(str(output_path)+filename, index=False, encoding='utf-8-sig')        

#Run Main 

## Load Enviroment

In [18]:
# BLOQUE PARA USAR DESDE COLAB

# Google drive loading as work station for local-usage of the files.
from google.colab import drive
drive.mount('/content/gdrive',force_remount= True)

#-----------------------------------------------------------------------------

Mounted at /content/gdrive


In [19]:
# Lista para cambiar los paths rapido.
workers = ["Ronald", "Local"]

# Change the number to change the paths.
worker = workers[0]

if worker == "Ronald":
  path= "/content/gdrive/MyDrive/Symbol_to_Symbol/ANN_dataset"
  synthetic_path= "/content/gdrive/MyDrive/Thesis_Workstation/ANN_dataset/synthetic_constellations"
  out_path = "/content/gdrive/MyDrive/Thesis_Workstation/ANN_dataset"
else: path = os.getcwd()

## Read Synthetic data

In [20]:
def read_synthetic_data(input_path, radius, functions):
  for funct in functions:
    for r in radius:
      filename = input_path+"/synthetic_cosntelation_radius_"+str(r)+"_funct_"+str(funct)+".csv"
      df = pd.read_csv(filename)  
      df = df.iloc[:, 2:df.shape[1]]
      break
  return df

In [21]:
def read_synthetic_data_v2(input_path, radius, function):

  filename = input_path+"/synthetic_cosntelation_radius_"+str(radius)+"_funct_"+str(function)+".csv"
  df = pd.read_csv(filename)  
  df = df.iloc[:, 2:df.shape[1]]
  return df

In [22]:
functions = ['uniform', 'non_uniform', 'gaussian']
radius = [0, 0.01, 0.02, 0.03]
read_synthetic_data_v2(synthetic_path, radius[0], functions[0])

Unnamed: 0,o0,o1,o2,o3,o4,o5,o6,o7,o8,o9,...,o2038,o2039,o2040,o2041,o2042,o2043,o2044,o2045,o2046,o2047
0,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,...,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j
1,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,...,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j
2,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,...,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j
3,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,...,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j
4,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,...,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j
5,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,...,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j
6,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,...,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j
7,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,...,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j
8,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,...,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j
9,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,-3+3j,...,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j,3-3j


## Global Variables

In [23]:
# Global variables

distances=[i for i in range(1,26)]
nsamples=50
span_length=80
nsymbols=2048

######
min_dist=0
max_dist=3000
selCP=[1,7,10,15]
selCP_pos=[(-3,3),(1,1),(-1,-1),(1,-3)]
my_centers=[[-3,3],[-1,3],[1,3],[3,3],[-3,1],[-1,1],[1,1],[3,1],[-3,-1],[-1,-1],[1,-1],[3,-1], [-3,-3],[-1,-3],[1,-3],[3,-3]]
######

## Execution Z Score

### Lauch

## Standard run

In [24]:
# PLEASE SELCET THE METHOD AND QUADRANTS THAT ARE DESIRED TO BE TESTED
method = "Z" # or method = "GD"
quadrants = [1, 7, 10, 15]

#Output mod files
output_path = path + '/modifiedData/'+ method

# PLEASE review that the path below is correct
input_data_path=path+"/rawData"
print('filepath selected: {}'.format(input_data_path))

filepath selected: /content/gdrive/MyDrive/Symbol_to_Symbol/ANN_dataset/rawData


In [None]:
symbol2symbol_main(method, quadrants, input_data_path, distances, output_path, params = None, mod_csv = True, mod_original = True, nsymbols = 2048, min_dist = 0, max_dist = 3000)

## Synthetic Run

In [26]:
# Synthetic Run Params

method = "Z" # or method = "GD"
quadrants = [1, 7, 10, 15]

# PLEASE review that the path below is correct
input_data_path=path+"/rawData"
print('filepath selected: {}'.format(input_data_path))

output_path = out_path + '/modifiedData/synthetic_data/'+ method

#source_params = [source_path, source_radius, source_function]

filepath selected: /content/gdrive/MyDrive/Symbol_to_Symbol/ANN_dataset/rawData


In [29]:
fun = ['uniform', 'non_uniform', 'gaussian']
rad = [0.00000001, 0.01, 0.02, 0.03]

for f in fun:
  for rd in rad:
    print('---------------')
    print('Evaluating {} function with a dispersion radius of {}.'.format(f, rd))
    print('\n')
    source_params = [synthetic_path, rd, f]
    symbol2symbol_main_v2(method, quadrants, input_data_path, distances, output_path, source_params=source_params, params = None, mod_csv = False, mod_original = False, nsymbols = 2048, min_dist = 0, max_dist = 3000)
    print('---------------')
    print('\n')

Evaluating uniform function with a dispersion radius of 1e-08.




  0%|          | 0/25 [00:00<?, ?it/s]



i =  0
 j =  1
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  2
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  3
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  4
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  5
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  6
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  7
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  8
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  9
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  10
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  11
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  12
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  13
   quadrant =  1
   quadrant =  7
   quadra

  4%|▍         | 1/25 [00:07<02:48,  7.01s/it]

   quadrant =  7
   quadrant =  10
   quadrant =  15


i =  1
 j =  2
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  3
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  4
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  5
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15
 j =  6
   quadrant =  1
   quadrant =  7
   quadrant =  10
   quadrant =  15


  4%|▍         | 1/25 [00:21<08:36, 21.50s/it]


KeyboardInterrupt: ignored

## Execution Gradient Descent

### Lauch

## Standard Run

In [None]:
# PLEASE SELCET THE METHOD AND QUADRANTS THAT ARE DESIRED TO BE TESTED
method = "GD" # or method = "Z"
quadrants = [1, 7, 10, 15]

# PLEASE review that the path below is correct
input_data_path=path+"/rawData"
print('filepath selected: {}'.format(input_data_path))

#Output mod files
output_path = out_path + '/modifiedData/'+ method

# Params for gradient descent alpha and beta
a = 0.7
b = 0.2
params = {"alpha": float(a), "beta": float(b)}

In [None]:
symbol2symbol_main(method, quadrants, input_data_path, distances, output_path, mod_csv = True, mod_original = False,  params = None, nsymbols = 2048, min_dist = 0, max_dist = 3000)

## Synthetic Run

In [None]:
# Synthetic Run Params

method = "GD" # or method = "Z"
quadrants = [1, 7, 10, 15]

# PLEASE review that the path below is correct
input_data_path=path+"/rawData"
print('filepath selected: {}'.format(input_data_path))

output_path = out_path + '/modifiedData/synthetic_data/'+ method

#source_params = [source_path, source_radius, source_function]

In [None]:
fun = ['uniform', 'non_uniform', 'gaussian']
rad = [0.00000001, 0.01, 0.02, 0.03]

for f in fun:
  for rd in rad:
    print('---------------')
    print('Evaluating {} function with a dispersion radius of {}.'.format(f, rd))
    print('\n')
    source_params = [synthetic_path, rd, f]
    symbol2symbol_main_v2(method, quadrants, input_data_path, distances, output_path, source_params=source_params, params = None, mod_csv = False, mod_original = False, nsymbols = 2048, min_dist = 0, max_dist = 3000)
    print('\n')
    print('---------------')