# Replace i with j in raw dataset
The raw RADAR data is made up of complex numbers. As it uses the mathematical convention of using  i to represent the imaginary component this is not compatible with python which uses the engineering convention of j.

## Notebook Setup


Allow editing of modules using editor (auto reloading)

In [1]:
import os

# CoLab
if os.getcwd() == '/content':
    from google.colab import drive
    drive.mount('/content/gdrive')
    BASE_PATH = '/content/gdrive/My Drive/Level-4-Project/'
    os.chdir('gdrive/My Drive/Level-4-Project/')

# Laptop
elif os.getcwd() == 'D:\\Google Drive\\Level-4-Project\\notebooks'\
        or os.getcwd() == 'D:\\Google Drive\\Level-4-Project\\src\\features':
    BASE_PATH = "D:/Google Drive/Level-4-Project/"
    
# GPU cluster
else:
    BASE_PATH = "/export/home/2192793m/Level-4-Project/"
    
RAW_PATH = BASE_PATH + 'data/raw/'
INTERIM_PATH = BASE_PATH + 'data/interim/'

In [2]:
import pandas as pd
from shutil import copy

In [3]:
df_labels = pd.read_csv(RAW_PATH + 'Labels.csv')
df_labels.rename(columns={'dataset ID': 'dataset_id'}, inplace=True)

In [4]:
def convert_to_complex(complex_string):
    """
    Converts string of format "a + bi" to a python complex number.
    To do this, i is replaced with j before conversion.
    :param complex_string: complex number in format "a + bi"
    :type complex_string: str
    :return: data point in complex form
    :rtype: complex
    """
    return complex(complex_string[0].replace('i', 'j'))

In [5]:
# Set to desired row to start processing from (used to resume from program crash)
resume_from_row = 0

In [6]:
number_of_rows = df_labels.shape[0]
current_row = 1
for row in df_labels.itertuples():
    if row.dataset_id >= resume_from_row:
        print("Processing row", current_row, "of", number_of_rows)
        print(str(row.dataset_id))
        file_name = RAW_PATH + "Dataset_" + str(row.dataset_id) + ".dat"
        radar_df = pd.read_table(file_name, sep="\n", header=None)
        converted = radar_df[4:].apply(convert_to_complex, axis=1)
        reshaped = converted.values.reshape(converted.shape[0], 1)
        radar_df[4:] = reshaped

        radar_df.to_csv(INTERIM_PATH + "Dataset_" + str(row.dataset_id) + ".dat")

    current_row += 1

In [7]:
# Copy Labels.csv file from raw folder to interim folder
copy(RAW_PATH + 'Labels.csv', INTERIM_PATH)