It's a template notebook for reading, stack and clean .csv files

In [2]:
import pandas as pd
import os

Define the folder path containing the CSV files and list all CSV files in the directory.

In [3]:
# Set the folder path containing the CSV files
folder_path = './data/csv'

# List all CSV files in the directory
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
print('CSV files found:', csv_files)

CSV files found: ['coordinates_ins_gnss_1_4_plots.csv', 'coordinates_ins_gnss_5_6_plots.csv', 'orientation_angles_plot_plots.csv', 'Velocities_dINS_dGNSS_plots.csv']


Iterate through each CSV file, read it into a DataFrame, and add a column for the file name as 'category'.

In [4]:
# Read each CSV, add 'category' column, and stack into a list
df_list = []
for file in csv_files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path, sep=';')
    df['category'] = os.path.splitext(file)[0]  # Use file name without extension
    df_list.append(df)

Explore random file

In [5]:
df_list[1].head()  # Display the first few rows of the first DataFrame

Unnamed: 0,variable,head,category
0,t,Difference between INS and GNSS coordinates,coordinates_ins_gnss_5_6_plots
1,BESTPOS_log_index,Difference between INS and GNSS coordinates,coordinates_ins_gnss_5_6_plots
2,dFi_od_m_mass,Difference between INS and GNSS coordinates,coordinates_ins_gnss_5_6_plots
3,dFi_gps_m,Difference between INS and GNSS coordinates,coordinates_ins_gnss_5_6_plots
4,dFi_ext_m,Difference between INS and GNSS coordinates,coordinates_ins_gnss_5_6_plots


Rename the columns: 'variable' to 'name', 'head' to 'subcategory', and ensure 'category' is set from the file name. Concatenate all DataFrames into one.

In [7]:
# Concatenate all DataFrames and rename columns
if df_list:
    df = pd.concat(df_list, ignore_index=True)
    df = df.rename(columns={'variable': 'name', 'head': 'subcategory'})
    df = df[['name', 'subcategory', 'category']]
else:
    df = pd.DataFrame(columns=['name', 'subcategory', 'category'])

df.head(10)

Unnamed: 0,name,subcategory,category
0,t,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss_1_4_plots
1,hor_bit,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss_1_4_plots
2,Latitude_ext2imu,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss_1_4_plots
3,DAGR_new_data,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss_1_4_plots
4,DAGR_Latitude,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss_1_4_plots
5,tt,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss_1_4_plots
6,Fi_od2imu_mass,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss_1_4_plots
7,r2dt,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss_1_4_plots
8,RANGE_log_index,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss_1_4_plots
9,Fi_r2pv,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss_1_4_plots


In [8]:
df.shape

(298, 3)

In [9]:
# Remove duplicates based on 'name' column
df.drop_duplicates(subset='name', inplace=True)

In [10]:
df.shape

(194, 3)

Clean subcategory and category names

In [11]:
# Replace _plot part from category column
df['category'] = df['category'].str.replace('_plot', '', regex=False)

In [12]:
# Replace _1_4s and _5_6s parts from category column
df.loc[df['category'].isin(['coordinates_ins_gnss_1_4s', 'coordinates_ins_gnss_5_6s']), 'category'] = 'coordinates_ins_gnss'

In [13]:
df.query('category == "coordinates_ins_gnss"')

Unnamed: 0,name,subcategory,category
0,t,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss
1,hor_bit,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss
2,Latitude_ext2imu,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss
3,DAGR_new_data,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss
4,DAGR_Latitude,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss
...,...,...,...
166,Vh_S_pv,Difference between INS and GNSS coordinates,coordinates_ins_gnss
167,Vh_S_mass,Difference between INS and GNSS coordinates,coordinates_ins_gnss
168,pos_source_mass,Difference between INS and GNSS coordinates,coordinates_ins_gnss
169,h_source_mass,Difference between INS and GNSS coordinates,coordinates_ins_gnss


In [15]:
df['category'].unique()

array(['coordinates_ins_gnss', 'orientation_angless',
       'Velocities_dINS_dGNSSs'], dtype=object)

In [16]:
df['subcategory'].unique()

array(['Plots of latitude, longitude (in degrees), altitude',
       'Plots of change of latitude, longitude (in meters), altitude',
       'Plots of latitude, longitude, altitude inaccuracies',
       'Difference between INS and GNSS coordinates',
       'Orientation angle plots', 'Difference in INS orientation tab',
       'Difference in AHRS orientation tab',
       'North, east and vertical velocity components plots',
       'Plots of inaccuracy of north, east and vertical velocity components',
       'Plots of differences between INS velocities and GNSS velocities',
       'Velocity source for INS aiding'], dtype=object)

In [17]:
df

Unnamed: 0,name,subcategory,category
0,t,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss
1,hor_bit,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss
2,Latitude_ext2imu,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss
3,DAGR_new_data,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss
4,DAGR_Latitude,"Plots of latitude, longitude (in degrees), alt...",coordinates_ins_gnss
...,...,...,...
271,V_gps2pv_SPAN,Plots of differences between INS velocities an...,Velocities_dINS_dGNSSs
272,V_gps2imu_SPAN,Plots of differences between INS velocities an...,Velocities_dINS_dGNSSs
295,vel_source_mass,Velocity source for INS aiding,Velocities_dINS_dGNSSs
296,xl,Velocity source for INS aiding,Velocities_dINS_dGNSSs


Save result file

In [18]:
df.to_csv('data/stack_csv.csv', index=False, sep=';')