# Remove the headers from all files in a directory

In [None]:
import os
import csv

# Define the input and output folders
# CHANGE THIS ----↓
input_folder = "..."
output_folder = "..."

# Create the output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Get the list of CSV files in the input folder
csv_files = [file for file in os.listdir(input_folder) if file.endswith(".csv")]

# Process each CSV file
for file in csv_files:
    input_file = os.path.join(input_folder, file)
    output_file = os.path.join(output_folder, file)

    # Read the input CSV file and remove the header
    with open(input_file, 'r') as f_input, open(output_file, 'w', newline='') as f_output:
        reader = csv.reader(f_input)
        writer = csv.writer(f_output)
        
        # Skip the header row
        next(reader)
        
        # Write the remaining rows to the output file
        writer.writerows(reader)

print("CSV files processed and headers removed.")


# Remove the header from one file


In [None]:
import pandas as pd

def remove_first_row(input_path, output_path):
    df = pd.read_csv(input_path)
    df = df.iloc[:, 1:]  # Remove the first row
    df.to_csv(output_path, index=False)

# CHANGE THIS --↓
input_path = "..."
output_path = "..."
remove_first_row(input_path, output_path)

# Combine multiple csv files from one folder into a single CSV file
This was used when CenterSpeed dataset expected a single file.

In [2]:
import csv
import os
# CHANGE THIS --↓
output_file = '...'
input_dir = "..."

input_files = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))]
counter = 0

for i, file in enumerate(input_files):
    with open(file, 'r') as f:
        print("Total Entries in ", input_files[i], " :", sum(1 for line in f))

with open(output_file, 'a', newline='') as f:
    writer = csv.writer(f)
    for file_nr, file in enumerate(input_files):
        with open(file, 'r') as f_input:
            for row in csv.reader(f_input):
                if 'lidar' in row[0]:
                    continue # Skip the header
                row = [str(file_nr)] + row
                writer.writerow(row)
                counter += 1

with open(output_file, 'r') as f:
    print("Total Entries:", sum(1 for line in f))

print('Done!')
print('Total rows written:', counter)

Total Entries in  /home/f1tenth/catkin_ws/src/race_stack/perception/dataset/output/big_dataset/25_04_Dataset04_FTG_10s.csv  : 339
Total Entries in  /home/f1tenth/catkin_ws/src/race_stack/perception/dataset/output/big_dataset/25_04_Dataset04_Fast_10s.csv  : 352
Total Entries in  /home/f1tenth/catkin_ws/src/race_stack/perception/dataset/output/big_dataset/25_04_Dataset04_Manual_10s.csv  : 350
Total Entries in  /home/f1tenth/catkin_ws/src/race_stack/perception/dataset/output/big_dataset/06_04_Dataset04_Front.csv  : 1281
Total Entries in  /home/f1tenth/catkin_ws/src/race_stack/perception/dataset/output/big_dataset/25_04_Dataset03_Manual_10s.csv  : 353
Total Entries in  /home/f1tenth/catkin_ws/src/race_stack/perception/dataset/output/big_dataset/25_04_Dataset03_10s.csv  : 353
Total Entries in  /home/f1tenth/catkin_ws/src/race_stack/perception/dataset/output/big_dataset/25_04_Dataset02_Manual_10s.csv  : 353
Total Entries in  /home/f1tenth/catkin_ws/src/race_stack/perception/dataset/output/bi

# Data-augmentation on CSV level
This was used before CenterSpeed dataset used transforms on a image-level.

In [1]:
import pandas as pd
import numpy as np
import csv

def augment(entry):
    entry['lidar'] = entry['lidar'].replace('(', '').replace(')', '')
    entry['intensities'] = entry['intensities'].replace('(', '').replace(')', '')
    lidar_data = np.fromstring(entry['lidar'], dtype=float, sep=', ', )
    intensities = np.fromstring(entry['intensities'], dtype=float, sep=',')
    lidar_data = np.flip(lidar_data)  # Reverse the lidar data
    intensities = np.flip(intensities)  # Reverse the intensities
    data = np.array(entry[3:])  # Extract the remaining data
    data[1] = -data[1]  # Reverse the y-coordinate
    data[3] = -data[3]  # Reverse the vy-coordinate
    data[4] = -data[4]  # Reverse the yaw
    
    return {'setid': entry['setid'], 'lidar': str(list(lidar_data)).replace('[','(').replace(']',')'), 'intensities': str(list(intensities)).replace('[','(').replace(']',')'),'x': data[0], 'y': data[1], 'vx': data[2], 'vy': data[3], 'yaw': data[4]}

# Change this --------↓
df = pd.read_csv('CSV_FILE_PATH', header=None, names=['setid', 'lidar', 'intensities', 'x', 'y', 'vx', 'vy', 'yaw'])
setn = df.max()['setid']
df.to_csv('5_combined_aug.csv', index=False)
fieldnames = ['setid','lidar', 'intensities', 'x', 'y', 'vx', 'vy', 'yaw']
df.columns = ['setid', 'lidar', 'intensities', 'x','y','vx','vy','yaw']
print("Initial length: ",len(df))

# Randomly select some entries
#num_entries_to_augment =  len(df)//2
entries_to_augment = df#df.sample(num_entries_to_augment)
augmented_entries = []
for index,entry in entries_to_augment.iterrows():
    entry['setid'] = int(entry['setid']) + setn + 1
    augmented_entry = augment(entry)
    augmented_entries.append(augmented_entry)
    print(entry)

# Change this ↓
with open('OUTPUT_FILE_PATH', mode='a', newline='') as file:
            # Create a DictWriter object, specifying the file and the fieldnames
            writer = csv.DictWriter(file, fieldnames=fieldnames)
    
    
            # Write the data rows to the CSV file
            for i, row in enumerate(augmented_entries):
                writer.writerow(row)

df_test = pd.read_csv('OUTPUT_FILE_PATH')
print(len(df_test))

Initial length:  6348
setid                                                          5
lidar          0.06499999761581421, 0.057999998331069946, 0.0...
intensities    550.0, 526.0, 497.0, 455.0, 421.0, 540.0, 540....
x                                                       0.895224
y                                                       0.881535
vx                                                      0.745468
vy                                                     -1.698698
yaw                                                     1.157256
Name: 0, dtype: object
setid                                                          5
lidar          0.06400000303983688, 0.07400000095367432, 0.05...
intensities    546.0, 536.0, 507.0, 463.0, 446.0, 542.0, 595....
x                                                       0.911195
y                                                       0.840821
vx                                                      0.748953
vy                                           