This notebook imports the motion data collected on board the R/V Akademik Tryoshnikov during the Antarctic Circumnavigation Expedition (ACE). 

Import required packages

In [113]:
import csv
import os
import pandas

Set up pandas display

In [131]:
pandas.set_option('display.max_columns', 100)

Set up the hard-coded variables

In [114]:
input_data_folder = "/media/jen/SAMSUNG/motion_data/"
output_data_folder = "/home/jen/projects/ace_data_management/wip/motion_data/"

Get the set of motion data files in a list.

In [115]:
def get_input_txt_files(input_data_folder):
    
    list_of_files = []
    
    os.chdir(input_data_folder)
    directory_path = os.getcwd()
    
    for filename in os.listdir(input_data_folder):
        if filename.endswith(".txt"):
            fullpath = directory_path + "/" + filename
            list_of_files.append(fullpath)
    
    return list_of_files

In [116]:
list_motion_data_files = get_input_txt_files(input_data_folder)
print(list_motion_data_files)
print(len(list_motion_data_files))

['/media/jen/SAMSUNG/motion_data/ACE-1_0.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_0.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_1.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_10.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_11.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_12.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_13.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_14.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_15.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_16.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_17.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_18.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_19.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_2.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_20.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_21.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_22.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_23.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_25.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_26.txt', '/media/jen/SAMSUNG/motion_data/ACE-2_27.txt', '/media/jen/SAMS

TODO Open the files. Check they all have the same header. 

In [None]:
for file in list_motion_data_files:
    with open(file, 'r') as csvfile:
        contents = csv.reader(csvfile, delimiter='\t')
        for row in contents:
            print(row)
        #test_contents = contents.decode("utf-16")
        #for row in contents:
         #   print(file, "\t", row[1])

Get the header for the files (this is listed in a csv file currently). 

In [117]:
header_file = "/home/jen/projects/ace_data_management/wip/motion_data/file_header.csv"

header = []
with open(header_file) as headerfile:
    contents = csv.reader(headerfile)
    header_list = list(contents)
    
    for item in header_list: 
        header.append(item[0])

In [118]:
header

['pc_time_hhmmsssss',
 'hydrins_time_hhmmsssss',
 'heading_degrees',
 'roll_degrees',
 'pitch_degrees',
 'heading_std_dev_degrees',
 'roll_std_dev_degrees',
 'pitch_std_dev_degrees',
 'north_speed_ms-1',
 'east_speed_ms-1',
 'vertical_speed_ms-1',
 'speed_norm_knots',
 'north_speed_std_dev_ms-1',
 'east_speed_std_dev_ms-1',
 'vertical_speed_std_dev_ms-1',
 'latitude_degrees',
 'longitude_degrees',
 'altitude_m',
 'latitude std dev (m)',
 'longitude std dev (m)',
 'altitude std dev (m)',
 'zone_i',
 'zone_c',
 'utm_north_m',
 'utm_east_m',
 'utm_latitude_m',
 'high_level_status',
 'system_status_1',
 'system_status_2',
 'algo_status_1',
 'algo_status_2',
 'gps_latitude_degrees',
 'gps_longitude_degrees',
 'gps_latitude_m',
 'gps_mode',
 'gps_time',
 'manual_gps_latitude_degrees',
 'manual_gps_longitude_degrees',
 'manual_gps_altitude_m',
 'manual_gps_latitude_std_dev_degrees',
 'manual_gps_longitude_std_dev_degrees',
 'manual_gps_altitude_std_dev_m',
 'unknown1',
 'unknown2',
 'unknown3

Note that the manual GPS column headers do not look to be correct. 

Put all data from the files into a pandas data frame. Make sure just one data frame is used. 

In [133]:
test_file = list_motion_data_files[1000]
print(test_file)

testdf = pandas.read_csv(test_file, sep='\t', skiprows=5, header=None)
testdf.columns = header

/media/jen/SAMSUNG/motion_data/ACE_Arenas-5_4.txt


Do the above now for all of the motion data files to bring the data together into one data frame. 

In [None]:
motion_df_each_file = (pandas.read_csv(test_file, sep='\t', skiprows=5, header=None) for file in list_motion_data_files)
concatenated_motion_df = pandas.concat(motion_df_each_file, ignore_index=True)

Alter the header of the data frame to the header previously imported.

In [140]:
concatenated_motion_df.columns = header

Find the number of rows in the dataframe.

In [141]:
len(concatenated_motion_df)

10152000

Preview the dataframe.

In [142]:
concatenated_motion_df.iloc[:5]

Unnamed: 0,pc_time_hhmmsssss,hydrins_time_hhmmsssss,heading_degrees,roll_degrees,pitch_degrees,heading_std_dev_degrees,roll_std_dev_degrees,pitch_std_dev_degrees,north_speed_ms-1,east_speed_ms-1,vertical_speed_ms-1,speed_norm_knots,north_speed_std_dev_ms-1,east_speed_std_dev_ms-1,vertical_speed_std_dev_ms-1,latitude_degrees,longitude_degrees,altitude_m,latitude std dev (m),longitude std dev (m),altitude std dev (m),zone_i,zone_c,utm_north_m,utm_east_m,utm_latitude_m,high_level_status,system_status_1,system_status_2,algo_status_1,algo_status_2,gps_latitude_degrees,gps_longitude_degrees,gps_latitude_m,gps_mode,gps_time,manual_gps_latitude_degrees,manual_gps_longitude_degrees,manual_gps_altitude_m,manual_gps_latitude_std_dev_degrees,manual_gps_longitude_std_dev_degrees,manual_gps_altitude_std_dev_m,unknown1,unknown2,unknown3
0,23:08:18.517,23:08:18.359,106.237,1.208,-1.391,0.123,0.003,0.003,-1.117,2.911,0.098,6.06,0.015,0.015,0.015,-65.891755,146.643413,-2.325,0.09,0.09,0.09,55,D,2692109.004,483747.238,-2.325,E7F95551,800,604,3015,3000,-65.891752,146.643275,19.552,4,23:08:18.200,0.000153,2.1e-05,-10.83,0.0,0.0,0.0,0.0,00:00:00.000,
1,23:08:19.517,23:08:19.359,106.041,1.141,-1.504,0.123,0.003,0.003,-1.151,2.913,0.042,6.09,0.015,0.015,0.015,-65.891765,146.643477,-2.236,0.09,0.09,0.09,55,D,2692107.882,483750.153,-2.236,E7F95551,800,604,3015,3000,-65.891762,146.643338,19.615,4,23:08:19.200,0.000153,2.1e-05,-10.83,0.0,0.0,0.0,0.0,00:00:00.000,
2,23:08:20.517,23:08:20.359,105.886,1.001,-1.592,0.123,0.003,0.003,-1.177,2.897,-0.025,6.08,0.015,0.015,0.015,-65.891776,146.64354,-2.215,0.09,0.09,0.09,55,D,2692106.727,483753.059,-2.215,E7F95551,800,604,3015,3000,-65.891772,146.643402,19.639,4,23:08:20.200,0.000153,2.1e-05,-10.83,0.0,0.0,0.0,0.0,00:00:00.000,
3,23:08:21.517,23:08:21.359,105.746,0.805,-1.661,0.123,0.003,0.003,-1.201,2.865,-0.081,6.04,0.015,0.015,0.015,-65.891786,146.643603,-2.26,0.09,0.09,0.09,55,D,2692105.546,483755.94,-2.26,E7F95551,800,604,3015,3000,-65.891782,146.643465,19.606,4,23:08:21.200,0.000153,2.1e-05,-10.83,0.0,0.0,0.0,0.0,00:00:00.000,
4,23:08:22.517,23:08:22.359,105.598,0.566,-1.718,0.123,0.003,0.003,-1.199,2.835,-0.124,5.98,0.015,0.015,0.015,-65.891797,146.643666,-2.356,0.09,0.09,0.09,55,D,2692104.359,483758.791,-2.356,E7F95551,800,604,3015,3000,-65.891792,146.643527,19.472,4,23:08:22.200,0.000153,2.1e-05,-10.83,0.0,0.0,0.0,0.0,00:00:00.000,


I've just realised that the data does not contain the date. This needs to be extracted from the header and then included in the data frame manually.

TODO We should check that the date is the same for the entirety of the file that is imported into the data frame: do any files go over more than one day?

TODO Then sort the data frame by the date and time.

In [144]:
motion_df = concatenated_motion_df.sort_values(by = ['pc_time_hhmmsssss']) # This currently does not work because of memory issues

MemoryError: 

TODO Output the entire data frame into a csv file so that is is easy to deal with for others.

In [146]:
concatenated_motion_df.to_csv(output_data_folder + "ace_motion_data.csv")