# From .txt(raw file) to .csv file(x,y,theta)

### Dependencies

In [9]:
import os
import csv
import numpy as np

In [10]:
dir_path = r"IB_missing_points"
output_dir_path = r"IB_Dataset"

# Averaged vecX: [7.260e-06 6.136e-06]
# Averaged vecY: [-6.840e-06  6.544e-06]

vecX = np.array([7.260e-06, 6.136e-06])
vecY = np.array([-6.840e-06, 6.544e-06])
basis = np.column_stack((vecX, vecY))
offset = np.array([-121.5409683, -25.0129261])

In [11]:
texts_array = []
textFiles = os.listdir(dir_path)
print(textFiles)


['0_15_0.txt', '0_15_135.txt', '0_15_180.txt', '0_15_225.txt', '0_15_270.txt', '0_15_315.txt', '0_15_45.txt', '0_15_90.txt', '0_25_0.txt', '0_25_135.txt', '0_25_180.txt', '0_25_225.txt', '0_25_270.txt', '0_25_315.txt', '0_25_45.txt', '0_25_90.txt', '0_35_0.txt', '0_35_135.txt', '0_35_180.txt', '0_35_225.txt', '0_35_270.txt', '0_35_315.txt', '0_35_45.txt', '0_35_90.txt', '0_45_0.txt', '0_45_135.txt', '0_45_180.txt', '0_45_225.txt', '0_45_270.txt', '0_45_315.txt', '0_45_45.txt', '0_45_90.txt', '0_55_0.txt', '0_55_135.txt', '0_55_180.txt', '0_55_225.txt', '0_55_270.txt', '0_55_315.txt', '0_55_45.txt', '0_55_90.txt', '0_5_0.txt', '0_5_135.txt', '0_5_180.txt', '0_5_225.txt', '0_5_270.txt', '0_5_315.txt', '0_5_45.txt', '0_5_90.txt', '0_65_0.txt', '0_65_135.txt', '0_65_180.txt', '0_65_225.txt', '0_65_270.txt', '0_65_315.txt', '0_65_45.txt', '0_65_90.txt']


In [12]:
def getTruth(filename):
    name = os.path.splitext(os.path.basename(filename))[0]
    x, y, theta = name.split("_")
    x = float(x)
    y = float(y)
    theta = float(theta)
    return x, y, theta

In [13]:
xyt = []
texts_array.clear()
for tFile in textFiles:
    fullFile = os.path.join(dir_path, tFile)
    x, y, t = getTruth(fullFile)
    xyt.append([x, y, t])
    with open(fullFile, 'r') as file:
        file_content = file.readlines()
        texts_array.append(file_content)

# texts_array[0]
print(len(texts_array))

56


In [14]:
i = 0
for texts in texts_array:
    # Remove the last line
    texts.pop()
    file_content = texts

    # Extract lines starting with $GNGGA or $GPHDT
    extracted_lines = [line for line in file_content if line.startswith('$GNGGA') or line.startswith('$GPHDT')]
    
    # Ensure the first line starts with $GNGGA and the last line with $GPHDT
    if not extracted_lines or not extracted_lines[0].startswith('$GNGGA'):
        extracted_lines.pop(0);
    if not extracted_lines or not extracted_lines[-1].startswith('$GPHDT'):
        extracted_lines.pop();
    
    # Ensure there are an even number of extracted lines
    if len(extracted_lines) % 2 != 0:
        print(f"Odd number of extracted lines in data_array[{i}][{j}].")
    # else:
    #     print("Success.")
    
    # Replace the element in data_array with the extracted lines
    texts_array[i] = extracted_lines
    i += 1

In [15]:
# Convert dd.mmmmmmmmm... to dd.dddddddd
def ddm2ddd(n):
    d = int(n)
    m_in_d = (n - d) * 5 / 3 # *(100/60)
    return round(d + m_in_d, 8)

In [16]:
len(texts_array)

56

In [17]:
def angleConversion(h):
    # 0 <-> 45, 45 <-> 0, 90 <-> 315, 135 <-> 270, 180 <-> 225 ... 
    return (405 - h) % 360

# def generate_flat_random_numbers(num_samples, range_width=2.6):
#     flat_values = np.random.uniform(low=-range_width/2, high=range_width/2, size=num_samples)
#     flat_values -= np.mean(flat_values) # Adjust the values to have an average of zero
#     rounded_values = np.round(flat_values, 3)
#     return rounded_values

# range_width = 2.6 # +-k -> 2k (+-1.3)

def LL2XY(Lat, Lon):
    v = np.array([Lon + offset[0], Lat + offset[1]])
    X, Y = np.linalg.solve(basis, v)
    return round(X, 5), round(Y, 5)

def convHDT(HDT, truth):
    if HDT != -1:
        return angleConversion(HDT)
    else: # Need to generate a faux HDT
        r = np.random.uniform(-5, 5)
        t = truth + r # No need angConv since it's already self-defined
        if t < 0:
            t += 360
        return round(t, 3)
        

In [18]:
processed_data = []

i = 0
for file_content in texts_array:
    n = len(file_content) 
    processed_data.clear()
    # Process pairs of lines starting with "$GNGGA" and "$GPHDT"
    for k in range(0, n, 2):
        if k + 1 < n and file_content[k].startswith('$GNGGA') and file_content[k + 1].startswith('$GPHDT'):
            gngga_line = file_content[k].split(',')
            gphdt_line = file_content[k + 1].split(',')
            
            # Extract the desired fields from $GNGGA and $GPHDT lines
            if len(gngga_line) >= 5 and len(gphdt_line) >= 3:
                Lat = ddm2ddd(float(gngga_line[2]) / 100)
                Lon = ddm2ddd(float(gngga_line[4]) / 100)
                x, y = LL2XY(Lat, Lon)

                HDT = float(gphdt_line[1]) if gphdt_line[1] else -1
                # Generate faux HDT if needed
                t = convHDT(HDT, xyt[i][2])
                processed_data.append([x, y, t])
    
    filename = str(int(xyt[i][0])) + '_' + str(int(xyt[i][1])) + '_' + str(int(xyt[i][2])) + '.csv'
    output_csv_file = os.path.join(output_dir_path, filename)
    # print(i)
    # print(filename)
    with open(output_csv_file, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerows(processed_data)

    i += 1